diff --git a/pom.xml b/pom.xml
index 3f1d74ab3a6..d07a71cb471 100644
--- a/pom.xml
+++ b/pom.xml
@@ -184,7 +184,7 @@
             <version>0.8.13</version>
             <optional>true</optional>
         </dependency>
-       <!-- Lucene spatial -->
+        <!-- Lucene spatial -->
 
 
         <!-- START: dependencies that are shaded -->
@@ -485,7 +485,8 @@
                             <haltOnFailure>${tests.failfast}</haltOnFailure>
                             <uniqueSuiteNames>false</uniqueSuiteNames>
                             <systemProperties>
-                                <java.io.tmpdir>.</java.io.tmpdir> <!-- we use '.' since this is different per JVM-->
+                                <java.io.tmpdir>.</java.io.tmpdir>
+                                <!-- we use '.' since this is different per JVM-->
                                 <!-- RandomizedTesting library system properties -->
                                 <tests.bwc>${tests.bwc}</tests.bwc>
                                 <tests.bwc.path>${tests.bwc.path}</tests.bwc.path>
@@ -539,15 +540,15 @@
                 <version>1.7</version>
                 <executions>
                     <execution>
-                    <phase>validate</phase>
-                    <goals>
-                        <goal>run</goal>
-                    </goals>
-                    <configuration>
-                           <target>
-                               <echo>Using ${java.runtime.name} ${java.runtime.version} ${java.vendor}</echo>
-                           </target>
-                    </configuration>
+                        <phase>validate</phase>
+                        <goals>
+                            <goal>run</goal>
+                        </goals>
+                        <configuration>
+                            <target>
+                                <echo>Using ${java.runtime.name} ${java.runtime.version} ${java.vendor}</echo>
+                            </target>
+                        </configuration>
                     </execution>
                     <execution>
                         <id>invalid-patterns</id>
@@ -575,7 +576,9 @@
                                     </fileset>
                                     <map from="${basedir}${file.separator}" to="* "/>
                                 </pathconvert>
-                                <fail if="validate.patternsFound">The following files contain tabs or nocommits:${line.separator}${validate.patternsFound}</fail>
+                                <fail if="validate.patternsFound">The following files contain tabs or
+                                    nocommits:${line.separator}${validate.patternsFound}
+                                </fail>
                             </target>
                         </configuration>
                     </execution>
@@ -583,7 +586,8 @@
                         <id>tests</id>
                         <phase>test</phase>
                         <configuration>
-                            <skip>${skipTests}</skip> <!-- don't run if we skip the tests -->
+                            <skip>${skipTests}</skip>
+                            <!-- don't run if we skip the tests -->
                             <failOnError>false</failOnError>
                             <target>
                                 <property name="runtime_classpath" refid="maven.runtime.classpath"/>
@@ -597,7 +601,7 @@
                                     </classpath>
                                 </taskdef>
                                 <tophints max="${tests.topn}">
-                                    <file file="${basedir}/${execution.hint.file}" />
+                                    <file file="${basedir}/${execution.hint.file}"/>
                                 </tophints>
                             </target>
                         </configuration>
@@ -710,7 +714,7 @@
                             <shadedPattern>org.elasticsearch.common.compress</shadedPattern>
                         </relocation>
                         <relocation>
-                        <pattern>com.github.mustachejava</pattern>
+                            <pattern>com.github.mustachejava</pattern>
                             <shadedPattern>org.elasticsearch.common.mustache</shadedPattern>
                         </relocation>
                         <relocation>
@@ -1221,6 +1225,11 @@
                                 <bundledSignature>jdk-unsafe</bundledSignature>
                                 <bundledSignature>jdk-deprecated</bundledSignature>
                             </bundledSignatures>
+                            <excludes>
+                                <!-- start exclude for test GC simulation using Thread.suspend -->
+                                <exclude>org/elasticsearch/test/disruption/LongGCDisruption.class</exclude>
+                                <!-- end exclude for GC simulation  -->
+                            </excludes>
                             <signaturesFiles>
                                 <signaturesFile>test-signatures.txt</signaturesFile>
                                 <signaturesFile>all-signatures.txt</signaturesFile>
@@ -1345,219 +1354,220 @@
         </pluginManagement>
     </build>
     <profiles>
-      <!-- default profile, with randomization setting kicks in -->
-      <profile>
-        <id>default</id>
-        <activation>
-          <activeByDefault>true</activeByDefault>
-        </activation>
-        <build>
-          <plugins>
-            <plugin>
-              <groupId>com.carrotsearch.randomizedtesting</groupId>
-              <artifactId>junit4-maven-plugin</artifactId>
-              <configuration>
-                <argLine>${tests.jvm.argline}</argLine>
-              </configuration>
-            </plugin>
-            <plugin>
-              <groupId>com.mycila</groupId>
-              <artifactId>license-maven-plugin</artifactId>
-              <version>2.5</version>
-              <configuration>
-                <header>dev-tools/elasticsearch_license_header.txt</header>
-                <headerDefinitions>
-                  <headerDefinition>dev-tools/license_header_definition.xml</headerDefinition>
-                </headerDefinitions>
-                <includes>
-                  <include>src/main/java/org/elasticsearch/**/*.java</include>
-                  <include>src/test/java/org/elasticsearch/**/*.java</include>
-                </includes>
-                <excludes>
-                  <exclude>src/main/java/org/elasticsearch/common/inject/**</exclude>
-                  <!-- Guice -->
-                  <exclude>src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java</exclude>
-                  <exclude>src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java</exclude>
-                  <exclude>src/main/java/org/elasticsearch/common/lucene/search/XFilteredQuery.java</exclude>
-                  <exclude>src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java</exclude>
-                  <exclude>src/main/java/org/apache/lucene/**/X*.java</exclude>
-                  <!-- t-digest -->
-                  <exclude>src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestState.java</exclude>
-                  <exclude>src/test/java/org/elasticsearch/search/aggregations/metrics/GroupTree.java</exclude>
-                </excludes>
-              </configuration>
-                <executions>
-                    <execution>
-                        <phase>compile</phase>
-                        <goals>
-                            <goal>check</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-          </plugins>
-        </build>
-      </profile>
-      <!-- profile for development that doesn't check forbidden-apis, no-commit validation or license headers run with mvn -Pdev -->
-      <profile>
-        <id>dev</id>
-        <properties>
-          <validate.skip>true</validate.skip>
-        </properties>
-          <build>
-        <plugins>
-          <plugin>
-              <groupId>de.thetaphi</groupId>
-              <artifactId>forbiddenapis</artifactId>
-              <version>1.5.1</version>
-              <executions>
-                <execution>
-                    <id>check-forbidden-apis</id>
-                    <phase>none</phase>
-                </execution>
-                <execution>
-                    <id>check-forbidden-test-apis</id>
-                    <phase>none</phase>
-                </execution>
-              </executions>
-          </plugin>
-          </plugins>
-          </build>
-      </profile>
-      <!-- license profile, to generate third party license file -->
-      <profile>
-        <id>license</id>
-        <activation>
-          <property>
-            <name>license.generation</name>
-            <value>true</value>
-          </property>
-        </activation>
-        <!-- not including license-maven-plugin is sufficent to expose default license -->
-      </profile>
-      <!-- jacoco coverage profile.  This will insert -jagent -->
-      <profile>
-        <id>coverage</id>
-        <activation>
-          <property>
-            <name>tests.coverage</name>
-            <value>true</value>
-          </property>
-        </activation>
-        <dependencies>
-          <dependency>
-            <!--  must be on the classpath  -->
-            <groupId>org.jacoco</groupId>
-            <artifactId>org.jacoco.agent</artifactId>
-            <classifier>runtime</classifier>
-            <version>0.6.4.201312101107</version>
-            <scope>test</scope>
-          </dependency>
-        </dependencies>
-        <build>
-          <plugins>
-            <plugin>
-              <groupId>org.jacoco</groupId>
-              <artifactId>jacoco-maven-plugin</artifactId>
-              <version>0.6.4.201312101107</version>
-              <executions>
-                <execution>
-                  <id>default-prepare-agent</id>
-                  <goals>
-                    <goal>prepare-agent</goal>
-                  </goals>
-                </execution>
-                <execution>
-                  <id>default-report</id>
-                  <phase>prepare-package</phase>
-                  <goals>
-                    <goal>report</goal>
-                  </goals>
-                </execution>
-                <execution>
-                  <id>default-check</id>
-                  <goals>
-                    <goal>check</goal>
-                  </goals>
-                </execution>
-              </executions>
-              <configuration>
-                <excludes>
-                  <exclude>jsr166e/**</exclude>
-                  <exclude>org/apache/lucene/**</exclude>
-                </excludes>
-              </configuration>
-            </plugin>
-          </plugins>
-        </build>
-      </profile>
-      <profile>
-        <id>static</id>
-        <activation>
-          <property>
-            <name>tests.static</name>
-            <value>true</value>
-          </property>
-        </activation>
-        <build>
-          <plugins>
-            <plugin>
-              <groupId>org.codehaus.mojo</groupId>
-              <artifactId>findbugs-maven-plugin</artifactId>
-              <version>2.5.3</version>
-            </plugin>
-          </plugins>
-        </build>
-        <reporting>
-          <plugins>
-            <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-jxr-plugin</artifactId>
-              <version>2.3</version>
-            </plugin>
-            <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-pmd-plugin</artifactId>
-              <version>3.0.1</version>
-              <configuration>
-                <rulesets>
-                  <ruleset>${basedir}/dev-tools/pmd/custom.xml</ruleset>
-                </rulesets>
-                <targetJdk>1.7</targetJdk>
-                <excludes>
-                  <exclude>**/jsr166e/**</exclude>
-                  <exclude>**/org/apache/lucene/**</exclude>
-                  <exclude>**/org/apache/elasticsearch/common/Base64.java</exclude>
-                </excludes>
-              </configuration>
-            </plugin>
-            <plugin>
-              <groupId>org.codehaus.mojo</groupId>
-              <artifactId>findbugs-maven-plugin</artifactId>
-              <version>2.5.3</version>
-              <configuration>
-                <xmlOutput>true</xmlOutput>
-                <xmlOutputDirectory>target/site</xmlOutputDirectory>
-                <fork>true</fork>
-                <maxHeap>2048</maxHeap>
-                <timeout>1800000</timeout>
-                <onlyAnalyze>org.elasticsearch.-</onlyAnalyze>
-              </configuration>
-            </plugin>
-            <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-project-info-reports-plugin</artifactId>
-              <version>2.7</version>
-              <reportSets>
-                <reportSet>
-                  <reports>
-                    <report>index</report>
-                  </reports>
-                </reportSet>
-              </reportSets>
-            </plugin>
-          </plugins>
-        </reporting>
-      </profile>
+        <!-- default profile, with randomization setting kicks in -->
+        <profile>
+            <id>default</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>com.carrotsearch.randomizedtesting</groupId>
+                        <artifactId>junit4-maven-plugin</artifactId>
+                        <configuration>
+                            <argLine>${tests.jvm.argline}</argLine>
+                        </configuration>
+                    </plugin>
+                    <plugin>
+                        <groupId>com.mycila</groupId>
+                        <artifactId>license-maven-plugin</artifactId>
+                        <version>2.5</version>
+                        <configuration>
+                            <header>dev-tools/elasticsearch_license_header.txt</header>
+                            <headerDefinitions>
+                                <headerDefinition>dev-tools/license_header_definition.xml</headerDefinition>
+                            </headerDefinitions>
+                            <includes>
+                                <include>src/main/java/org/elasticsearch/**/*.java</include>
+                                <include>src/test/java/org/elasticsearch/**/*.java</include>
+                            </includes>
+                            <excludes>
+                                <exclude>src/main/java/org/elasticsearch/common/inject/**</exclude>
+                                <!-- Guice -->
+                                <exclude>src/main/java/org/elasticsearch/common/geo/GeoHashUtils.java</exclude>
+                                <exclude>src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java</exclude>
+                                <exclude>src/main/java/org/elasticsearch/common/lucene/search/XFilteredQuery.java</exclude>
+                                <exclude>src/main/java/org/apache/lucene/queryparser/XSimpleQueryParser.java</exclude>
+                                <exclude>src/main/java/org/apache/lucene/**/X*.java</exclude>
+                                <!-- t-digest -->
+                                <exclude>src/main/java/org/elasticsearch/search/aggregations/metrics/percentiles/tdigest/TDigestState.java
+                                </exclude>
+                                <exclude>src/test/java/org/elasticsearch/search/aggregations/metrics/GroupTree.java</exclude>
+                            </excludes>
+                        </configuration>
+                        <executions>
+                            <execution>
+                                <phase>compile</phase>
+                                <goals>
+                                    <goal>check</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+        <!-- profile for development that doesn't check forbidden-apis, no-commit validation or license headers run with mvn -Pdev -->
+        <profile>
+            <id>dev</id>
+            <properties>
+                <validate.skip>true</validate.skip>
+            </properties>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>de.thetaphi</groupId>
+                        <artifactId>forbiddenapis</artifactId>
+                        <version>1.5.1</version>
+                        <executions>
+                            <execution>
+                                <id>check-forbidden-apis</id>
+                                <phase>none</phase>
+                            </execution>
+                            <execution>
+                                <id>check-forbidden-test-apis</id>
+                                <phase>none</phase>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+        <!-- license profile, to generate third party license file -->
+        <profile>
+            <id>license</id>
+            <activation>
+                <property>
+                    <name>license.generation</name>
+                    <value>true</value>
+                </property>
+            </activation>
+            <!-- not including license-maven-plugin is sufficent to expose default license -->
+        </profile>
+        <!-- jacoco coverage profile.  This will insert -jagent -->
+        <profile>
+            <id>coverage</id>
+            <activation>
+                <property>
+                    <name>tests.coverage</name>
+                    <value>true</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <!--  must be on the classpath  -->
+                    <groupId>org.jacoco</groupId>
+                    <artifactId>org.jacoco.agent</artifactId>
+                    <classifier>runtime</classifier>
+                    <version>0.6.4.201312101107</version>
+                    <scope>test</scope>
+                </dependency>
+            </dependencies>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.jacoco</groupId>
+                        <artifactId>jacoco-maven-plugin</artifactId>
+                        <version>0.6.4.201312101107</version>
+                        <executions>
+                            <execution>
+                                <id>default-prepare-agent</id>
+                                <goals>
+                                    <goal>prepare-agent</goal>
+                                </goals>
+                            </execution>
+                            <execution>
+                                <id>default-report</id>
+                                <phase>prepare-package</phase>
+                                <goals>
+                                    <goal>report</goal>
+                                </goals>
+                            </execution>
+                            <execution>
+                                <id>default-check</id>
+                                <goals>
+                                    <goal>check</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                        <configuration>
+                            <excludes>
+                                <exclude>jsr166e/**</exclude>
+                                <exclude>org/apache/lucene/**</exclude>
+                            </excludes>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+        <profile>
+            <id>static</id>
+            <activation>
+                <property>
+                    <name>tests.static</name>
+                    <value>true</value>
+                </property>
+            </activation>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>findbugs-maven-plugin</artifactId>
+                        <version>2.5.3</version>
+                    </plugin>
+                </plugins>
+            </build>
+            <reporting>
+                <plugins>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-jxr-plugin</artifactId>
+                        <version>2.3</version>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-pmd-plugin</artifactId>
+                        <version>3.0.1</version>
+                        <configuration>
+                            <rulesets>
+                                <ruleset>${basedir}/dev-tools/pmd/custom.xml</ruleset>
+                            </rulesets>
+                            <targetJdk>1.7</targetJdk>
+                            <excludes>
+                                <exclude>**/jsr166e/**</exclude>
+                                <exclude>**/org/apache/lucene/**</exclude>
+                                <exclude>**/org/apache/elasticsearch/common/Base64.java</exclude>
+                            </excludes>
+                        </configuration>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>findbugs-maven-plugin</artifactId>
+                        <version>2.5.3</version>
+                        <configuration>
+                            <xmlOutput>true</xmlOutput>
+                            <xmlOutputDirectory>target/site</xmlOutputDirectory>
+                            <fork>true</fork>
+                            <maxHeap>2048</maxHeap>
+                            <timeout>1800000</timeout>
+                            <onlyAnalyze>org.elasticsearch.-</onlyAnalyze>
+                        </configuration>
+                    </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-project-info-reports-plugin</artifactId>
+                        <version>2.7</version>
+                        <reportSets>
+                            <reportSet>
+                                <reports>
+                                    <report>index</report>
+                                </reports>
+                            </reportSet>
+                        </reportSets>
+                    </plugin>
+                </plugins>
+            </reporting>
+        </profile>
     </profiles>
 </project>
diff --git a/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java b/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java
index fa77ae88478..edcf8334640 100644
--- a/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java
+++ b/src/main/java/org/elasticsearch/action/admin/cluster/settings/TransportClusterUpdateSettingsAction.java
@@ -137,6 +137,12 @@ public class TransportClusterUpdateSettingsAction extends TransportMasterNodeOpe
                         return new ClusterUpdateSettingsResponse(updateSettingsAcked && acknowledged, transientUpdates.build(), persistentUpdates.build());
                     }
 
+                    @Override
+                    public void onNoLongerMaster(String source) {
+                        logger.debug("failed to preform reroute after cluster settings were updated - current node is no longer a master");
+                        listener.onResponse(new ClusterUpdateSettingsResponse(updateSettingsAcked, transientUpdates.build(), persistentUpdates.build()));
+                    }
+
                     @Override
                     public void onFailure(String source, Throwable t) {
                         //if the reroute fails we only log
diff --git a/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java b/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java
index c0e9a65de34..2e54d5cf181 100644
--- a/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java
+++ b/src/main/java/org/elasticsearch/action/admin/indices/recovery/TransportRecoveryAction.java
@@ -173,12 +173,12 @@ public class TransportRecoveryAction extends
 
     @Override
     protected ClusterBlockException checkGlobalBlock(ClusterState state, RecoveryRequest request) {
-        return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA);
+        return state.blocks().globalBlockedException(ClusterBlockLevel.READ);
     }
 
     @Override
     protected ClusterBlockException checkRequestBlock(ClusterState state, RecoveryRequest request, String[] concreteIndices) {
-        return state.blocks().indicesBlockedException(ClusterBlockLevel.METADATA, concreteIndices);
+        return state.blocks().indicesBlockedException(ClusterBlockLevel.READ, concreteIndices);
     }
 
     static class ShardRecoveryRequest extends BroadcastShardOperationRequest {
diff --git a/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java b/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java
index 0ebfd47593e..5868aa12b5a 100644
--- a/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java
+++ b/src/main/java/org/elasticsearch/action/bench/BenchmarkService.java
@@ -66,11 +66,11 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
     /**
      * Constructs a service component for running benchmarks
      *
-     * @param settings          Settings
-     * @param clusterService    Cluster service
-     * @param threadPool        Thread pool
-     * @param client            Client
-     * @param transportService  Transport service
+     * @param settings         Settings
+     * @param clusterService   Cluster service
+     * @param threadPool       Thread pool
+     * @param client           Client
+     * @param transportService Transport service
      */
     @Inject
     public BenchmarkService(Settings settings, ClusterService clusterService, ThreadPool threadPool,
@@ -86,19 +86,22 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
     }
 
     @Override
-    protected void doStart() throws ElasticsearchException { }
+    protected void doStart() throws ElasticsearchException {
+    }
 
     @Override
-    protected void doStop() throws ElasticsearchException { }
+    protected void doStop() throws ElasticsearchException {
+    }
 
     @Override
-    protected void doClose() throws ElasticsearchException { }
+    protected void doClose() throws ElasticsearchException {
+    }
 
     /**
      * Lists actively running benchmarks on the cluster
      *
-     * @param request   Status request
-     * @param listener  Response listener
+     * @param request  Status request
+     * @param listener Response listener
      */
     public void listBenchmarks(final BenchmarkStatusRequest request, final ActionListener<BenchmarkStatusResponse> listener) {
 
@@ -171,8 +174,8 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
     /**
      * Executes benchmarks on the cluster
      *
-     * @param request   Benchmark request
-     * @param listener  Response listener
+     * @param request  Benchmark request
+     * @param listener Response listener
      */
     public void startBenchmark(final BenchmarkRequest request, final ActionListener<BenchmarkResponse> listener) {
 
@@ -228,7 +231,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
                 listener.onFailure(t);
             }
         }, (benchmarkResponse.state() != BenchmarkResponse.State.ABORTED) &&
-           (benchmarkResponse.state() != BenchmarkResponse.State.FAILED)));
+                (benchmarkResponse.state() != BenchmarkResponse.State.FAILED)));
     }
 
     private final boolean isBenchmarkNode(DiscoveryNode node) {
@@ -403,6 +406,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
         }
 
         public abstract T newInstance();
+
         protected abstract void sendResponse();
 
         @Override
@@ -593,7 +597,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
 
             if (bmd != null) {
                 for (BenchmarkMetaData.Entry entry : bmd.entries()) {
-                    if (request.benchmarkName().equals(entry.benchmarkId())){
+                    if (request.benchmarkName().equals(entry.benchmarkId())) {
                         if (entry.state() != BenchmarkMetaData.State.SUCCESS && entry.state() != BenchmarkMetaData.State.FAILED) {
                             throw new ElasticsearchException("A benchmark with ID [" + request.benchmarkName() + "] is already running in state [" + entry.state() + "]");
                         }
@@ -648,7 +652,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
         @Override
         protected BenchmarkMetaData.Entry process(BenchmarkMetaData.Entry entry) {
             BenchmarkMetaData.State state = entry.state();
-            assert state == BenchmarkMetaData.State.STARTED || state == BenchmarkMetaData.State.ABORTED :  "Expected state: STARTED or ABORTED but was: " + entry.state();
+            assert state == BenchmarkMetaData.State.STARTED || state == BenchmarkMetaData.State.ABORTED : "Expected state: STARTED or ABORTED but was: " + entry.state();
             if (success) {
                 return new BenchmarkMetaData.Entry(entry, BenchmarkMetaData.State.SUCCESS);
             } else {
@@ -661,7 +665,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
         private final String[] patterns;
 
         public AbortBenchmarkTask(String[] patterns, BenchmarkStateListener listener) {
-            super("abort_benchmark", null , listener);
+            super("abort_benchmark", null, listener);
             this.patterns = patterns;
         }
 
@@ -675,7 +679,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
         }
     }
 
-    public abstract class UpdateBenchmarkStateTask implements ProcessedClusterStateUpdateTask {
+    public abstract class UpdateBenchmarkStateTask extends ProcessedClusterStateUpdateTask {
 
         private final String reason;
         protected final String benchmarkId;
@@ -702,7 +706,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
                 ImmutableList.Builder<BenchmarkMetaData.Entry> builder = new ImmutableList.Builder<BenchmarkMetaData.Entry>();
                 for (BenchmarkMetaData.Entry e : bmd.entries()) {
                     if (benchmarkId == null || match(e)) {
-                        e = process(e) ;
+                        e = process(e);
                         instances.add(e);
                     }
                     // Don't keep finished benchmarks around in cluster state
@@ -741,7 +745,7 @@ public class BenchmarkService extends AbstractLifecycleComponent<BenchmarkServic
         }
     }
 
-    public abstract class BenchmarkStateChangeAction<R extends MasterNodeOperationRequest> implements TimeoutClusterStateUpdateTask {
+    public abstract class BenchmarkStateChangeAction<R extends MasterNodeOperationRequest> extends TimeoutClusterStateUpdateTask {
         protected final R request;
 
         public BenchmarkStateChangeAction(R request) {
diff --git a/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java
index 7cdee753873..087bd1c6ad6 100644
--- a/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java
+++ b/src/main/java/org/elasticsearch/cluster/AckedClusterStateUpdateTask.java
@@ -28,7 +28,7 @@ import org.elasticsearch.common.unit.TimeValue;
  * An extension interface to {@link ClusterStateUpdateTask} that allows to be notified when
  * all the nodes have acknowledged a cluster state update request
  */
-public abstract class AckedClusterStateUpdateTask<Response> implements TimeoutClusterStateUpdateTask {
+public abstract class AckedClusterStateUpdateTask<Response> extends TimeoutClusterStateUpdateTask {
 
     private final ActionListener<Response> listener;
     private final AckedRequest request;
@@ -40,6 +40,7 @@ public abstract class AckedClusterStateUpdateTask<Response> implements TimeoutCl
 
     /**
      * Called to determine which nodes the acknowledgement is expected from
+     *
      * @param discoveryNode a node
      * @return true if the node is expected to send ack back, false otherwise
      */
@@ -50,6 +51,7 @@ public abstract class AckedClusterStateUpdateTask<Response> implements TimeoutCl
     /**
      * Called once all the nodes have acknowledged the cluster state update request. Must be
      * very lightweight execution, since it gets executed on the cluster service thread.
+     *
      * @param t optional error that might have been thrown
      */
     public void onAllNodesAcked(@Nullable Throwable t) {
diff --git a/src/main/java/org/elasticsearch/cluster/ClusterService.java b/src/main/java/org/elasticsearch/cluster/ClusterService.java
index 6204599f57d..080fce84a36 100644
--- a/src/main/java/org/elasticsearch/cluster/ClusterService.java
+++ b/src/main/java/org/elasticsearch/cluster/ClusterService.java
@@ -110,4 +110,5 @@ public interface ClusterService extends LifecycleComponent<ClusterService> {
      * Returns the tasks that are pending.
      */
     List<PendingClusterTask> pendingTasks();
+
 }
diff --git a/src/main/java/org/elasticsearch/cluster/ClusterState.java b/src/main/java/org/elasticsearch/cluster/ClusterState.java
index ecb041a233e..d208d6a20a7 100644
--- a/src/main/java/org/elasticsearch/cluster/ClusterState.java
+++ b/src/main/java/org/elasticsearch/cluster/ClusterState.java
@@ -115,6 +115,8 @@ public class ClusterState implements ToXContent {
     }
 
 
+    public static final long UNKNOWN_VERSION = -1;
+
     private final long version;
 
     private final RoutingTable routingTable;
diff --git a/src/main/java/org/elasticsearch/cluster/ClusterStateNonMasterUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ClusterStateNonMasterUpdateTask.java
new file mode 100644
index 00000000000..48afbb8f1fe
--- /dev/null
+++ b/src/main/java/org/elasticsearch/cluster/ClusterStateNonMasterUpdateTask.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.cluster;
+
+/**
+ * This is a marker interface to indicate that the task should be executed
+ * even if the current node is not a master.
+ */
+public abstract class ClusterStateNonMasterUpdateTask extends ClusterStateUpdateTask {
+
+    @Override
+    public boolean runOnlyOnMaster() {
+        return false;
+    }
+}
diff --git a/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java
index 490a556ab12..921b6d149ee 100644
--- a/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java
+++ b/src/main/java/org/elasticsearch/cluster/ClusterStateUpdateTask.java
@@ -19,19 +19,37 @@
 
 package org.elasticsearch.cluster;
 
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
+
 /**
  * A task that can update the cluster state.
  */
-public interface ClusterStateUpdateTask {
+abstract public class ClusterStateUpdateTask {
 
     /**
      * Update the cluster state based on the current state. Return the *same instance* if no state
      * should be changed.
      */
-    ClusterState execute(ClusterState currentState) throws Exception;
+    abstract public ClusterState execute(ClusterState currentState) throws Exception;
 
     /**
      * A callback called when execute fails.
      */
-    void onFailure(String source, Throwable t);
+    abstract public void onFailure(String source, @Nullable Throwable t);
+
+
+    /**
+     * indicates whether this task should only run if current node is master
+     */
+    public boolean runOnlyOnMaster() {
+        return true;
+    }
+
+    /**
+     * called when the task was rejected because the local node is no longer master
+     */
+    public void onNoLongerMaster(String source) {
+        onFailure(source, new EsRejectedExecutionException("no longer master. source: [" + source + "]"));
+    }
 }
diff --git a/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateNonMasterUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateNonMasterUpdateTask.java
new file mode 100644
index 00000000000..4af05b43581
--- /dev/null
+++ b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateNonMasterUpdateTask.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.cluster;
+
+/**
+ * A combination between {@link org.elasticsearch.cluster.ProcessedClusterStateUpdateTask} and
+ * {@link org.elasticsearch.cluster.ClusterStateNonMasterUpdateTask} to allow easy creation of anonymous classes
+ */
+abstract public class ProcessedClusterStateNonMasterUpdateTask extends ProcessedClusterStateUpdateTask {
+
+    @Override
+    public boolean runOnlyOnMaster() {
+        return false;
+    }
+}
diff --git a/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java
index 72074965f95..2d703ed2621 100644
--- a/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java
+++ b/src/main/java/org/elasticsearch/cluster/ProcessedClusterStateUpdateTask.java
@@ -23,11 +23,11 @@ package org.elasticsearch.cluster;
  * An extension interface to {@link ClusterStateUpdateTask} that allows to be notified when
  * the cluster state update has been processed.
  */
-public interface ProcessedClusterStateUpdateTask extends ClusterStateUpdateTask {
+public abstract class ProcessedClusterStateUpdateTask extends ClusterStateUpdateTask {
 
     /**
      * Called when the result of the {@link #execute(ClusterState)} have been processed
      * properly by all listeners.
      */
-    void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState);
+    public abstract void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState);
 }
diff --git a/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java b/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java
index 1083e1ddcbe..1ae767c6560 100644
--- a/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java
+++ b/src/main/java/org/elasticsearch/cluster/TimeoutClusterStateUpdateTask.java
@@ -25,11 +25,11 @@ import org.elasticsearch.common.unit.TimeValue;
  * An extension interface to {@link org.elasticsearch.cluster.ClusterStateUpdateTask} that allows to associate
  * a timeout.
  */
-public interface TimeoutClusterStateUpdateTask extends ProcessedClusterStateUpdateTask {
+abstract public class TimeoutClusterStateUpdateTask extends ProcessedClusterStateUpdateTask {
 
     /**
      * If the cluster state update task wasn't processed by the provided timeout, call
      * {@link #onFailure(String, Throwable)}
      */
-    TimeValue timeout();
+    abstract public TimeValue timeout();
 }
diff --git a/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java b/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java
index 957bd406263..bb7d332de4f 100644
--- a/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java
+++ b/src/main/java/org/elasticsearch/cluster/block/ClusterBlocks.java
@@ -108,6 +108,19 @@ public class ClusterBlocks {
         return global.contains(block);
     }
 
+    public boolean hasGlobalBlock(int blockId) {
+        for (ClusterBlock clusterBlock : global) {
+            if (clusterBlock.id() == blockId) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    public boolean hasGlobalBlock(ClusterBlockLevel level) {
+        return global(level).size() > 0;
+    }
+
     /**
      * Is there a global block with the provided status?
      */
diff --git a/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java b/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java
index b33804de564..555b8b3ef1b 100644
--- a/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java
+++ b/src/main/java/org/elasticsearch/cluster/routing/RoutingService.java
@@ -149,10 +149,15 @@ public class RoutingService extends AbstractLifecycleComponent<RoutingService> i
                     return ClusterState.builder(currentState).routingResult(routingResult).build();
                 }
 
+                @Override
+                public void onNoLongerMaster(String source) {
+                    // no biggie
+                }
+
                 @Override
                 public void onFailure(String source, Throwable t) {
-                    ClusterState state = clusterService.state();
-                    logger.error("unexpected failure during [{}], current state:\n{}", t, source, state.prettyPrint());
+                        ClusterState state = clusterService.state();
+                        logger.error("unexpected failure during [{}], current state:\n{}", t, source, state.prettyPrint());
                 }
             });
             routingTableDirty = false;
diff --git a/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java b/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java
index fad94ba1944..c5fe004f8b9 100644
--- a/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java
+++ b/src/main/java/org/elasticsearch/cluster/service/InternalClusterService.java
@@ -84,7 +84,7 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
 
     private volatile ClusterState clusterState;
 
-    private final ClusterBlocks.Builder initialBlocks = ClusterBlocks.builder().addGlobalBlock(Discovery.NO_MASTER_BLOCK);
+    private final ClusterBlocks.Builder initialBlocks;
 
     private volatile ScheduledFuture reconnectToNodes;
 
@@ -104,6 +104,8 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
         this.reconnectInterval = componentSettings.getAsTime("reconnect_interval", TimeValue.timeValueSeconds(10));
 
         localNodeMasterListeners = new LocalNodeMasterListeners(threadPool);
+
+        initialBlocks = ClusterBlocks.builder().addGlobalBlock(discoveryService.getNoMasterBlock());
     }
 
     public NodeSettingsService settingsService() {
@@ -134,7 +136,7 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
         discoveryService.addLifecycleListener(new LifecycleListener() {
             @Override
             public void afterStart() {
-                submitStateUpdateTask("update local node", Priority.IMMEDIATE, new ClusterStateUpdateTask() {
+                submitStateUpdateTask("update local node", Priority.IMMEDIATE, new ClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) throws Exception {
                         return ClusterState.builder(currentState)
@@ -144,7 +146,7 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
 
                     @Override
                     public void onFailure(String source, Throwable t) {
-                        logger.warn("failed ot update local node", t);
+                        logger.warn("failed to update local node", t);
                     }
                 });
             }
@@ -323,6 +325,11 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
             }
             logger.debug("processing [{}]: execute", source);
             ClusterState previousClusterState = clusterState;
+            if (!previousClusterState.nodes().localNodeMaster() && updateTask.runOnlyOnMaster()) {
+                logger.debug("failing [{}]: local node is no longer master", source);
+                updateTask.onNoLongerMaster(source);
+                return;
+            }
             ClusterState newClusterState;
             try {
                 newClusterState = updateTask.execute(previousClusterState);
@@ -379,20 +386,6 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
                             }
                         }
                     }
-                } else {
-                    if (previousClusterState.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK) && !newClusterState.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) {
-                        // force an update, its a fresh update from the master as we transition from a start of not having a master to having one
-                        // have a fresh instances of routing and metadata to remove the chance that version might be the same
-                        Builder builder = ClusterState.builder(newClusterState);
-                        builder.routingTable(RoutingTable.builder(newClusterState.routingTable()));
-                        builder.metaData(MetaData.builder(newClusterState.metaData()));
-                        newClusterState = builder.build();
-                        logger.debug("got first state from fresh master [{}]", newClusterState.nodes().masterNodeId());
-                    } else if (newClusterState.version() < previousClusterState.version()) {
-                        // we got a cluster state with older version, when we are *not* the master, let it in since it might be valid
-                        // we check on version where applicable, like at ZenDiscovery#handleNewClusterStateFromMaster
-                        logger.debug("got smaller cluster state when not master [" + newClusterState.version() + "<" + previousClusterState.version() + "] from source [" + source + "]");
-                    }
                 }
 
                 newClusterState.status(ClusterState.ClusterStateStatus.BEING_APPLIED);
@@ -720,5 +713,4 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
             }
         }
     }
-
 }
\ No newline at end of file
diff --git a/src/main/java/org/elasticsearch/cluster/settings/ClusterDynamicSettingsModule.java b/src/main/java/org/elasticsearch/cluster/settings/ClusterDynamicSettingsModule.java
index 7a022982ab7..de9f66b2e75 100644
--- a/src/main/java/org/elasticsearch/cluster/settings/ClusterDynamicSettingsModule.java
+++ b/src/main/java/org/elasticsearch/cluster/settings/ClusterDynamicSettingsModule.java
@@ -27,6 +27,7 @@ import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllo
 import org.elasticsearch.cluster.routing.allocation.decider.*;
 import org.elasticsearch.common.inject.AbstractModule;
 import org.elasticsearch.discovery.DiscoverySettings;
+import org.elasticsearch.discovery.zen.ZenDiscovery;
 import org.elasticsearch.discovery.zen.elect.ElectMasterService;
 import org.elasticsearch.indices.breaker.HierarchyCircuitBreakerService;
 import org.elasticsearch.indices.cache.filter.IndicesFilterCache;
@@ -57,6 +58,8 @@ public class ClusterDynamicSettingsModule extends AbstractModule {
         clusterDynamicSettings.addDynamicSetting(DisableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_DISABLE_ALLOCATION);
         clusterDynamicSettings.addDynamicSetting(DisableAllocationDecider.CLUSTER_ROUTING_ALLOCATION_DISABLE_REPLICA_ALLOCATION);
         clusterDynamicSettings.addDynamicSetting(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES, Validator.INTEGER);
+        clusterDynamicSettings.addDynamicSetting(ZenDiscovery.SETTING_REJOIN_ON_MASTER_GONE, Validator.BOOLEAN);
+        clusterDynamicSettings.addDynamicSetting(DiscoverySettings.NO_MASTER_BLOCK);
         clusterDynamicSettings.addDynamicSetting(FilterAllocationDecider.CLUSTER_ROUTING_INCLUDE_GROUP + "*");
         clusterDynamicSettings.addDynamicSetting(FilterAllocationDecider.CLUSTER_ROUTING_EXCLUDE_GROUP + "*");
         clusterDynamicSettings.addDynamicSetting(FilterAllocationDecider.CLUSTER_ROUTING_REQUIRE_GROUP + "*");
diff --git a/src/main/java/org/elasticsearch/discovery/Discovery.java b/src/main/java/org/elasticsearch/discovery/Discovery.java
index b66e90c8c1c..dfd51e6348f 100644
--- a/src/main/java/org/elasticsearch/discovery/Discovery.java
+++ b/src/main/java/org/elasticsearch/discovery/Discovery.java
@@ -20,14 +20,11 @@
 package org.elasticsearch.discovery;
 
 import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.block.ClusterBlock;
-import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.routing.allocation.AllocationService;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.component.LifecycleComponent;
 import org.elasticsearch.node.service.NodeService;
-import org.elasticsearch.rest.RestStatus;
 
 /**
  * A pluggable module allowing to implement discovery of other nodes, publishing of the cluster
@@ -36,8 +33,6 @@ import org.elasticsearch.rest.RestStatus;
  */
 public interface Discovery extends LifecycleComponent<Discovery> {
 
-    final ClusterBlock NO_MASTER_BLOCK = new ClusterBlock(2, "no master", true, true, RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL);
-
     DiscoveryNode localNode();
 
     void addListener(InitialStateDiscoveryListener listener);
diff --git a/src/main/java/org/elasticsearch/discovery/DiscoveryService.java b/src/main/java/org/elasticsearch/discovery/DiscoveryService.java
index 0108db12a19..f73f2bbb593 100644
--- a/src/main/java/org/elasticsearch/discovery/DiscoveryService.java
+++ b/src/main/java/org/elasticsearch/discovery/DiscoveryService.java
@@ -22,6 +22,7 @@ package org.elasticsearch.discovery;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.ElasticsearchTimeoutException;
 import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.block.ClusterBlock;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.component.AbstractLifecycleComponent;
@@ -38,6 +39,8 @@ import java.util.concurrent.TimeUnit;
  */
 public class DiscoveryService extends AbstractLifecycleComponent<DiscoveryService> {
 
+    public static final String SETTING_INITIAL_STATE_TIMEOUT = "discovery.initial_state_timeout";
+
     private static class InitialStateListener implements InitialStateDiscoveryListener {
 
         private final CountDownLatch latch = new CountDownLatch(1);
@@ -60,12 +63,18 @@ public class DiscoveryService extends AbstractLifecycleComponent<DiscoveryServic
     private final TimeValue initialStateTimeout;
     private final Discovery discovery;
     private InitialStateListener initialStateListener;
+    private final DiscoverySettings discoverySettings;
 
     @Inject
-    public DiscoveryService(Settings settings, Discovery discovery) {
+    public DiscoveryService(Settings settings, DiscoverySettings discoverySettings, Discovery discovery) {
         super(settings);
+        this.discoverySettings = discoverySettings;
         this.discovery = discovery;
-        this.initialStateTimeout = componentSettings.getAsTime("initial_state_timeout", TimeValue.timeValueSeconds(30));
+        this.initialStateTimeout = settings.getAsTime(SETTING_INITIAL_STATE_TIMEOUT, TimeValue.timeValueSeconds(30));
+    }
+
+    public ClusterBlock getNoMasterBlock() {
+        return discoverySettings.getNoMasterBlock();
     }
 
     @Override
diff --git a/src/main/java/org/elasticsearch/discovery/DiscoverySettings.java b/src/main/java/org/elasticsearch/discovery/DiscoverySettings.java
index 5a56e2d3a1b..b8d48b16129 100644
--- a/src/main/java/org/elasticsearch/discovery/DiscoverySettings.java
+++ b/src/main/java/org/elasticsearch/discovery/DiscoverySettings.java
@@ -19,11 +19,17 @@
 
 package org.elasticsearch.discovery;
 
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.cluster.block.ClusterBlock;
+import org.elasticsearch.cluster.block.ClusterBlockLevel;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.node.settings.NodeSettingsService;
+import org.elasticsearch.rest.RestStatus;
+
+import java.util.EnumSet;
 
 /**
  * Exposes common discovery settings that may be supported by all the different discovery implementations
@@ -31,15 +37,24 @@ import org.elasticsearch.node.settings.NodeSettingsService;
 public class DiscoverySettings extends AbstractComponent {
 
     public static final String PUBLISH_TIMEOUT = "discovery.zen.publish_timeout";
+    public static final String NO_MASTER_BLOCK = "discovery.zen.no_master_block";
 
     public static final TimeValue DEFAULT_PUBLISH_TIMEOUT = TimeValue.timeValueSeconds(30);
+    public static final String DEFAULT_NO_MASTER_BLOCK = "write";
+    public final static int NO_MASTER_BLOCK_ID = 2;
 
+    public final static ClusterBlock NO_MASTER_BLOCK_ALL = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, true, RestStatus.SERVICE_UNAVAILABLE, ClusterBlockLevel.ALL);
+    public final static ClusterBlock NO_MASTER_BLOCK_WRITES = new ClusterBlock(NO_MASTER_BLOCK_ID, "no master", true, false, RestStatus.SERVICE_UNAVAILABLE, EnumSet.of(ClusterBlockLevel.WRITE, ClusterBlockLevel.METADATA));
+
+    private volatile ClusterBlock noMasterBlock;
     private volatile TimeValue publishTimeout = DEFAULT_PUBLISH_TIMEOUT;
 
     @Inject
     public DiscoverySettings(Settings settings, NodeSettingsService nodeSettingsService) {
         super(settings);
         nodeSettingsService.addListener(new ApplySettings());
+        this.noMasterBlock = parseNoMasterBlock(settings.get(NO_MASTER_BLOCK, DEFAULT_NO_MASTER_BLOCK));
+        this.publishTimeout = settings.getAsTime(PUBLISH_TIMEOUT, publishTimeout);
     }
 
     /**
@@ -49,6 +64,10 @@ public class DiscoverySettings extends AbstractComponent {
         return publishTimeout;
     }
 
+    public ClusterBlock getNoMasterBlock() {
+        return noMasterBlock;
+    }
+
     private class ApplySettings implements NodeSettingsService.Listener {
         @Override
         public void onRefreshSettings(Settings settings) {
@@ -59,6 +78,24 @@ public class DiscoverySettings extends AbstractComponent {
                     publishTimeout = newPublishTimeout;
                 }
             }
+            String newNoMasterBlockValue = settings.get(NO_MASTER_BLOCK);
+            if (newNoMasterBlockValue != null) {
+                ClusterBlock newNoMasterBlock = parseNoMasterBlock(newNoMasterBlockValue);
+                if (newNoMasterBlock != noMasterBlock) {
+                    noMasterBlock = newNoMasterBlock;
+                }
+            }
+        }
+    }
+
+    private ClusterBlock parseNoMasterBlock(String value) {
+        switch (value) {
+            case "all":
+                return NO_MASTER_BLOCK_ALL;
+            case "write":
+                return NO_MASTER_BLOCK_WRITES;
+            default:
+                throw new ElasticsearchIllegalArgumentException("invalid master block [" + value + "]");
         }
     }
 }
diff --git a/src/main/java/org/elasticsearch/discovery/local/LocalDiscovery.java b/src/main/java/org/elasticsearch/discovery/local/LocalDiscovery.java
index 31ae57a7cf8..f948b4d5c5d 100644
--- a/src/main/java/org/elasticsearch/discovery/local/LocalDiscovery.java
+++ b/src/main/java/org/elasticsearch/discovery/local/LocalDiscovery.java
@@ -58,6 +58,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
 
     private final TransportService transportService;
     private final ClusterService clusterService;
+    private final DiscoveryService discoveryService;
     private final DiscoveryNodeService discoveryNodeService;
     private AllocationService allocationService;
     private final ClusterName clusterName;
@@ -77,7 +78,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
 
     @Inject
     public LocalDiscovery(Settings settings, ClusterName clusterName, TransportService transportService, ClusterService clusterService,
-                          DiscoveryNodeService discoveryNodeService, Version version, DiscoverySettings discoverySettings) {
+                          DiscoveryNodeService discoveryNodeService, Version version, DiscoverySettings discoverySettings, DiscoveryService discoveryService) {
         super(settings);
         this.clusterName = clusterName;
         this.clusterService = clusterService;
@@ -85,6 +86,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
         this.discoveryNodeService = discoveryNodeService;
         this.version = version;
         this.discoverySettings = discoverySettings;
+        this.discoveryService = discoveryService;
     }
 
     @Override
@@ -123,7 +125,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
                 // we are the first master (and the master)
                 master = true;
                 final LocalDiscovery master = firstMaster;
-                clusterService.submitStateUpdateTask("local-disco-initial_connect(master)", new ProcessedClusterStateUpdateTask() {
+                clusterService.submitStateUpdateTask("local-disco-initial_connect(master)", new ProcessedClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) {
                         DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
@@ -132,7 +134,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
                         }
                         nodesBuilder.localNodeId(master.localNode().id()).masterNodeId(master.localNode().id());
                         // remove the NO_MASTER block in this case
-                        ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(Discovery.NO_MASTER_BLOCK);
+                        ClusterBlocks.Builder blocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock());
                         return ClusterState.builder(currentState).nodes(nodesBuilder).blocks(blocks).build();
                     }
 
@@ -149,7 +151,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
             } else if (firstMaster != null) {
                 // update as fast as we can the local node state with the new metadata (so we create indices for example)
                 final ClusterState masterState = firstMaster.clusterService.state();
-                clusterService.submitStateUpdateTask("local-disco(detected_master)", new ClusterStateUpdateTask() {
+                clusterService.submitStateUpdateTask("local-disco(detected_master)", new ClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) {
                         // make sure we have the local node id set, we might need it as a result of the new metadata
@@ -165,7 +167,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
 
                 // tell the master to send the fact that we are here
                 final LocalDiscovery master = firstMaster;
-                firstMaster.clusterService.submitStateUpdateTask("local-disco-receive(from node[" + localNode + "])", new ProcessedClusterStateUpdateTask() {
+                firstMaster.clusterService.submitStateUpdateTask("local-disco-receive(from node[" + localNode + "])", new ProcessedClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) {
                         DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder();
@@ -225,7 +227,7 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
                 }
 
                 final LocalDiscovery master = firstMaster;
-                master.clusterService.submitStateUpdateTask("local-disco-update", new ClusterStateUpdateTask() {
+                master.clusterService.submitStateUpdateTask("local-disco-update", new ClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) {
                         DiscoveryNodes newNodes = currentState.nodes().removeDeadMembers(newMembers, master.localNode.id());
@@ -305,13 +307,22 @@ public class LocalDiscovery extends AbstractLifecycleComponent<Discovery> implem
                 nodeSpecificClusterState.status(ClusterState.ClusterStateStatus.RECEIVED);
                 // ignore cluster state messages that do not include "me", not in the game yet...
                 if (nodeSpecificClusterState.nodes().localNode() != null) {
-                    discovery.clusterService.submitStateUpdateTask("local-disco-receive(from master)", new ProcessedClusterStateUpdateTask() {
+                    assert nodeSpecificClusterState.nodes().masterNode() != null : "received a cluster state without a master";
+                    assert !nodeSpecificClusterState.blocks().hasGlobalBlock(discoveryService.getNoMasterBlock()) : "received a cluster state with a master block";
+
+                    discovery.clusterService.submitStateUpdateTask("local-disco-receive(from master)", new ProcessedClusterStateNonMasterUpdateTask() {
                         @Override
                         public ClusterState execute(ClusterState currentState) {
                             if (nodeSpecificClusterState.version() < currentState.version() && Objects.equal(nodeSpecificClusterState.nodes().masterNodeId(), currentState.nodes().masterNodeId())) {
                                 return currentState;
                             }
 
+                            if (currentState.blocks().hasGlobalBlock(discoveryService.getNoMasterBlock())) {
+                                // its a fresh update from the master as we transition from a start of not having a master to having one
+                                logger.debug("got first state from fresh master [{}]", nodeSpecificClusterState.nodes().masterNodeId());
+                                return nodeSpecificClusterState;
+                            }
+
                             ClusterState.Builder builder = ClusterState.builder(nodeSpecificClusterState);
                             // if the routing table did not change, use the original one
                             if (nodeSpecificClusterState.routingTable().version() == currentState.routingTable().version()) {
diff --git a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java
index b1149cbbf55..d7c8c0ccafc 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java
@@ -22,9 +22,7 @@ package org.elasticsearch.discovery.zen;
 import com.google.common.base.Objects;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
-import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.ElasticsearchIllegalStateException;
-import org.elasticsearch.Version;
+import org.elasticsearch.*;
 import org.elasticsearch.cluster.*;
 import org.elasticsearch.cluster.block.ClusterBlocks;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
@@ -32,10 +30,10 @@ import org.elasticsearch.cluster.metadata.MetaData;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodeService;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
-import org.elasticsearch.cluster.routing.RoutingTable;
 import org.elasticsearch.cluster.routing.allocation.AllocationService;
 import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
 import org.elasticsearch.common.Priority;
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.component.AbstractLifecycleComponent;
 import org.elasticsearch.common.component.Lifecycle;
 import org.elasticsearch.common.inject.Inject;
@@ -45,6 +43,7 @@ import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
+import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
 import org.elasticsearch.discovery.Discovery;
 import org.elasticsearch.discovery.DiscoveryService;
 import org.elasticsearch.discovery.DiscoverySettings;
@@ -56,19 +55,20 @@ import org.elasticsearch.discovery.zen.membership.MembershipAction;
 import org.elasticsearch.discovery.zen.ping.ZenPing;
 import org.elasticsearch.discovery.zen.ping.ZenPingService;
 import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction;
-import org.elasticsearch.gateway.GatewayService;
 import org.elasticsearch.node.service.NodeService;
 import org.elasticsearch.node.settings.NodeSettingsService;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.*;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import static com.google.common.collect.Lists.newArrayList;
 import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
@@ -78,6 +78,16 @@ import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
  */
 public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implements Discovery, DiscoveryNodesProvider {
 
+    public final static String SETTING_REJOIN_ON_MASTER_GONE = "discovery.zen.rejoin_on_master_gone";
+    public final static String SETTING_PING_TIMEOUT = "discovery.zen.ping.timeout";
+    public final static String SETTING_JOIN_TIMEOUT = "discovery.zen.join_timeout";
+    public final static String SETTING_JOIN_RETRY_ATTEMPTS = "discovery.zen.join_retry_attempts";
+    public final static String SETTING_JOIN_RETRY_DELAY = "discovery.zen.join_retry_delay";
+    public final static String SETTING_MAX_PINGS_FROM_ANOTHER_MASTER = "discovery.zen.max_pings_from_another_master";
+    public final static String SETTING_SEND_LEAVE_REQUEST = "discovery.zen.send_leave_request";
+    public final static String SETTING_MASTER_ELECTION_FILTER_CLIENT = "discovery.zen.master_election.filter_client";
+    public final static String SETTING_MASTER_ELECTION_FILTER_DATA = "discovery.zen.master_election.filter_data";
+
     public static final String DISCOVERY_REJOIN_ACTION_NAME = "internal:discovery/zen/rejoin";
 
     private final ThreadPool threadPool;
@@ -86,6 +96,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
     private AllocationService allocationService;
     private final ClusterName clusterName;
     private final DiscoveryNodeService discoveryNodeService;
+    private final DiscoverySettings discoverySettings;
     private final ZenPingService pingService;
     private final MasterFaultDetection masterFD;
     private final NodesFaultDetection nodesFD;
@@ -97,6 +108,14 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
     private final TimeValue pingTimeout;
     private final TimeValue joinTimeout;
 
+    /** how many retry attempts to perform if join request failed with an retriable error */
+    private final int joinRetryAttempts;
+    /** how long to wait before performing another join attempt after a join request failed with an retriable error */
+    private final TimeValue joinRetryDelay;
+
+    /** how many pings from *another* master to tolerate before forcing a rejoin on other or local master */
+    private final int maxPingsFromAnotherMaster;
+
     // a flag that should be used only for testing
     private final boolean sendLeaveRequest;
 
@@ -118,41 +137,61 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
 
     private final AtomicBoolean initialStateSent = new AtomicBoolean();
 
+    private volatile boolean rejoinOnMasterGone;
 
     @Nullable
     private NodeService nodeService;
 
+    private final BlockingQueue<Tuple<DiscoveryNode, MembershipAction.JoinCallback>> processJoinRequests = ConcurrentCollections.newBlockingQueue();
+
     @Inject
     public ZenDiscovery(Settings settings, ClusterName clusterName, ThreadPool threadPool,
                         TransportService transportService, ClusterService clusterService, NodeSettingsService nodeSettingsService,
-                        DiscoveryNodeService discoveryNodeService, ZenPingService pingService, Version version, DiscoverySettings discoverySettings) {
+                        DiscoveryNodeService discoveryNodeService, ZenPingService pingService, ElectMasterService electMasterService, Version version,
+                        DiscoverySettings discoverySettings) {
         super(settings);
         this.clusterName = clusterName;
         this.threadPool = threadPool;
         this.clusterService = clusterService;
         this.transportService = transportService;
         this.discoveryNodeService = discoveryNodeService;
+        this.discoverySettings = discoverySettings;
         this.pingService = pingService;
         this.version = version;
+        this.electMaster = electMasterService;
 
-        // also support direct discovery.zen settings, for cases when it gets extended
-        this.pingTimeout = settings.getAsTime("discovery.zen.ping.timeout", settings.getAsTime("discovery.zen.ping_timeout", componentSettings.getAsTime("ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3)))));
-        this.joinTimeout = settings.getAsTime("discovery.zen.join_timeout", TimeValue.timeValueMillis(pingTimeout.millis() * 20));
-        this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true);
+        // keep using componentSettings for BWC, in case this class gets extended.
+        TimeValue pingTimeout = componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3));
+        pingTimeout = componentSettings.getAsTime("ping_timeout", pingTimeout);
+        pingTimeout = settings.getAsTime("discovery.zen.ping_timeout", pingTimeout);
+        this.pingTimeout = settings.getAsTime(SETTING_PING_TIMEOUT, pingTimeout);
 
-        this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true);
-        this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false);
+        this.joinTimeout = settings.getAsTime(SETTING_JOIN_TIMEOUT, TimeValue.timeValueMillis(pingTimeout.millis() * 20));
+        this.joinRetryAttempts = settings.getAsInt(SETTING_JOIN_RETRY_ATTEMPTS, 3);
+        this.joinRetryDelay = settings.getAsTime(SETTING_JOIN_RETRY_DELAY, TimeValue.timeValueMillis(100));
+        this.maxPingsFromAnotherMaster = settings.getAsInt(SETTING_MAX_PINGS_FROM_ANOTHER_MASTER, 3);
+        this.sendLeaveRequest = settings.getAsBoolean(SETTING_SEND_LEAVE_REQUEST, true);
+
+        this.masterElectionFilterClientNodes = settings.getAsBoolean(SETTING_MASTER_ELECTION_FILTER_CLIENT, true);
+        this.masterElectionFilterDataNodes = settings.getAsBoolean(SETTING_MASTER_ELECTION_FILTER_DATA, false);
+        this.rejoinOnMasterGone = settings.getAsBoolean(SETTING_REJOIN_ON_MASTER_GONE, true);
+
+        if (this.joinRetryAttempts < 1) {
+            throw new ElasticsearchIllegalArgumentException("'" + SETTING_JOIN_RETRY_ATTEMPTS + "' must be a positive number. got [" + this.SETTING_JOIN_RETRY_ATTEMPTS + "]");
+        }
+        if (this.maxPingsFromAnotherMaster < 1) {
+            throw new ElasticsearchIllegalArgumentException("'" + SETTING_MAX_PINGS_FROM_ANOTHER_MASTER + "' must be a positive number. got [" + this.maxPingsFromAnotherMaster + "]");
+        }
 
         logger.debug("using ping.timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
 
-        this.electMaster = new ElectMasterService(settings);
         nodeSettingsService.addListener(new ApplySettings());
 
-        this.masterFD = new MasterFaultDetection(settings, threadPool, transportService, this);
+        this.masterFD = new MasterFaultDetection(settings, threadPool, transportService, this, clusterName);
         this.masterFD.addListener(new MasterNodeFailureListener());
 
-        this.nodesFD = new NodesFaultDetection(settings, threadPool, transportService);
-        this.nodesFD.addListener(new NodeFailureListener());
+        this.nodesFD = new NodesFaultDetection(settings, threadPool, transportService, clusterName);
+        this.nodesFD.addListener(new NodeFaultDetectionListener());
 
         this.publishClusterState = new PublishClusterStateAction(settings, transportService, this, new NewClusterStateListener(), discoverySettings, clusterName);
         this.pingService.setNodesProvider(this);
@@ -178,7 +217,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         final String nodeId = DiscoveryService.generateNodeId(settings);
         localNode = new DiscoveryNode(settings.get("name"), nodeId, transportService.boundAddress().publishAddress(), nodeAttributes, version);
         latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build();
-        nodesFD.updateNodes(latestDiscoNodes);
+        nodesFD.updateNodes(latestDiscoNodes, ClusterState.UNKNOWN_VERSION);
         pingService.start();
 
         // do the join on a different thread, the DiscoveryService waits for 30s anyhow till it is discovered
@@ -272,7 +311,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
             throw new ElasticsearchIllegalStateException("Shouldn't publish state when not master");
         }
         latestDiscoNodes = clusterState.nodes();
-        nodesFD.updateNodes(clusterState.nodes());
+        nodesFD.updateNodes(clusterState.nodes(), clusterState.version());
         publishClusterState.publish(clusterState, ackListener);
     }
 
@@ -295,6 +334,15 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         });
     }
 
+
+    /**
+     * returns true if there is a currently a background thread active for (re)joining the cluster
+     * used for testing.
+     */
+    public boolean joiningCluster() {
+        return currentJoinThread != null;
+    }
+
     private void innerJoinCluster() {
         boolean retry = true;
         while (retry) {
@@ -311,18 +359,24 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
             if (localNode.equals(masterNode)) {
                 this.master = true;
                 nodesFD.start(); // start the nodes FD
-                clusterService.submitStateUpdateTask("zen-disco-join (elected_as_master)", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
+                clusterService.submitStateUpdateTask("zen-disco-join (elected_as_master)", Priority.URGENT, new ProcessedClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) {
-                        DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder()
+                        // Take into account the previous known nodes, if they happen not to be available
+                        // then fault detection will remove these nodes.
+                        DiscoveryNodes.Builder builder = new DiscoveryNodes.Builder(latestDiscoNodes)
                                 .localNodeId(localNode.id())
                                 .masterNodeId(localNode.id())
                                         // put our local node
                                 .put(localNode);
                         // update the fact that we are the master...
                         latestDiscoNodes = builder.build();
-                        ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(NO_MASTER_BLOCK).build();
-                        return ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build();
+                        ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks()).removeGlobalBlock(discoverySettings.getNoMasterBlock()).build();
+                        currentState = ClusterState.builder(currentState).nodes(latestDiscoNodes).blocks(clusterBlocks).build();
+
+                        // eagerly run reroute to remove dead nodes from routing table
+                        RoutingAllocation.Result result = allocationService.reroute(currentState);
+                        return ClusterState.builder(currentState).routingResult(result).build();
                     }
 
                     @Override
@@ -337,30 +391,18 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                 });
             } else {
                 this.master = false;
-                try {
-                    // first, make sure we can connect to the master
-                    transportService.connectToNode(masterNode);
-                } catch (Exception e) {
-                    logger.warn("failed to connect to master [{}], retrying...", e, masterNode);
-                    retry = true;
-                    continue;
-                }
                 // send join request
-                try {
-                    membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout);
-                } catch (Exception e) {
-                    if (e instanceof ElasticsearchException) {
-                        logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage());
-                    } else {
-                        logger.info("failed to send join request to master [{}], reason [{}]", masterNode, e.getMessage());
-                    }
-                    if (logger.isTraceEnabled()) {
-                        logger.trace("detailed failed reason", e);
-                    }
-                    // failed to send the join request, retry
+                retry = !joinElectedMaster(masterNode);
+                if (retry) {
+                    continue;
+                }
+
+                if (latestDiscoNodes.masterNode() == null) {
+                    logger.debug("no master node is set, despite of join request completing. retrying pings");
                     retry = true;
                     continue;
                 }
+
                 masterFD.start(masterNode, "initial_join");
                 // no need to submit the received cluster state, we will get it from the master when it publishes
                 // the fact that we joined
@@ -368,6 +410,52 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         }
     }
 
+    /**
+     * Join a newly elected master.
+     *
+     * @return true if successful
+     */
+    private boolean joinElectedMaster(DiscoveryNode masterNode) {
+        try {
+            // first, make sure we can connect to the master
+            transportService.connectToNode(masterNode);
+        } catch (Exception e) {
+            logger.warn("failed to connect to master [{}], retrying...", e, masterNode);
+            return false;
+        }
+        int joinAttempt = 0; // we retry on illegal state if the master is not yet ready
+        while (true) {
+            try {
+                logger.trace("joining master {}", masterNode);
+                membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout);
+                return true;
+            } catch (Throwable t) {
+                Throwable unwrap = ExceptionsHelper.unwrapCause(t);
+                if (unwrap instanceof ElasticsearchIllegalStateException) {
+                    if (++joinAttempt == this.joinRetryAttempts) {
+                        logger.info("failed to send join request to master [{}], reason [{}], tried [{}] times", masterNode, ExceptionsHelper.detailedMessage(t), joinAttempt);
+                        return false;
+                    } else {
+                        logger.trace("master {} failed with [{}]. retrying... (attempts done: [{}])", masterNode, ExceptionsHelper.detailedMessage(t), joinAttempt);
+                    }
+                } else {
+                    if (logger.isTraceEnabled()) {
+                        logger.trace("failed to send join request to master [{}]", t, masterNode);
+                    } else {
+                        logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ExceptionsHelper.detailedMessage(t));
+                    }
+                    return false;
+                }
+            }
+
+            try {
+                Thread.sleep(this.joinRetryDelay.millis());
+            } catch (InterruptedException e) {
+                Thread.currentThread().interrupt();
+            }
+        }
+    }
+
     private void handleLeaveRequest(final DiscoveryNode node) {
         if (lifecycleState() != Lifecycle.State.STARTED) {
             // not started, ignore a node failure
@@ -389,6 +477,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                     return ClusterState.builder(currentState).routingResult(routingResult).build();
                 }
 
+                @Override
+                public void onNoLongerMaster(String source) {
+                    // ignoring (already logged)
+                }
+
                 @Override
                 public void onFailure(String source, Throwable t) {
                     logger.error("unexpected failure during [{}]", t, source);
@@ -424,6 +517,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                 return ClusterState.builder(currentState).routingResult(routingResult).build();
             }
 
+            @Override
+            public void onNoLongerMaster(String source) {
+                // already logged
+            }
+
             @Override
             public void onFailure(String source, Throwable t) {
                 logger.error("unexpected failure during [{}]", t, source);
@@ -457,6 +555,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                 return currentState;
             }
 
+
+            @Override
+            public void onNoLongerMaster(String source) {
+                // ignoring (already logged)
+            }
+
             @Override
             public void onFailure(String source, Throwable t) {
                 logger.error("unexpected failure during [{}]", t, source);
@@ -481,7 +585,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
 
         logger.info("master_left [{}], reason [{}]", masterNode, reason);
 
-        clusterService.submitStateUpdateTask("zen-disco-master_failed (" + masterNode + ")", Priority.IMMEDIATE, new ProcessedClusterStateUpdateTask() {
+        clusterService.submitStateUpdateTask("zen-disco-master_failed (" + masterNode + ")", Priority.IMMEDIATE, new ProcessedClusterStateNonMasterUpdateTask() {
             @Override
             public ClusterState execute(ClusterState currentState) {
                 if (!masterNode.id().equals(currentState.nodes().masterNodeId())) {
@@ -493,6 +597,16 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                         // make sure the old master node, which has failed, is not part of the nodes we publish
                         .remove(masterNode.id())
                         .masterNodeId(null).build();
+                latestDiscoNodes = discoveryNodes;
+
+                // flush any pending cluster states from old master, so it will not be set as master again
+                ArrayList<ProcessClusterState> pendingNewClusterStates = new ArrayList<>();
+                processNewClusterStates.drainTo(pendingNewClusterStates);
+                logger.trace("removed [{}] pending cluster states", pendingNewClusterStates.size());
+
+                if (rejoinOnMasterGone) {
+                    return rejoin(ClusterState.builder(currentState).nodes(discoveryNodes).build(), "master left (reason = " + reason + ")");
+                }
 
                 if (!electMaster.hasEnoughMasterNodes(discoveryNodes)) {
                     return rejoin(ClusterState.builder(currentState).nodes(discoveryNodes).build(), "not enough master nodes after master left (reason = " + reason + ")");
@@ -561,29 +675,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
             clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
                 @Override
                 public ClusterState execute(ClusterState currentState) {
-                    if (newState.version() > currentState.version()) {
-                        logger.warn("received cluster state from [{}] which is also master but with a newer cluster_state, rejoining to cluster...", newState.nodes().masterNode());
-                        return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]");
-                    } else {
-                        logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode());
-
-                        try {
-                            // make sure we're connected to this node (connect to node does nothing if we're already connected)
-                            // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node
-                            // in the past (after a master failure, for example)
-                            transportService.connectToNode(newState.nodes().masterNode());
-                            transportService.sendRequest(newState.nodes().masterNode(), DISCOVERY_REJOIN_ACTION_NAME, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
-                                @Override
-                                public void handleException(TransportException exp) {
-                                    logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode());
-                                }
-                            });
-                        } catch (Exception e) {
-                            logger.warn("failed to send rejoin request to [{}]", e, newState.nodes().masterNode());
-                        }
-
-                        return currentState;
-                    }
+                    return handleAnotherMaster(currentState, newState.nodes().masterNode(), newState.version(), "via a new cluster state");
                 }
 
                 @Override
@@ -610,7 +702,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                 final ProcessClusterState processClusterState = new ProcessClusterState(newClusterState, newStateProcessed);
                 processNewClusterStates.add(processClusterState);
 
-                clusterService.submitStateUpdateTask("zen-disco-receive(from master [" + newClusterState.nodes().masterNode() + "])", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
+
+                assert newClusterState.nodes().masterNode() != null : "received a cluster state without a master";
+                assert !newClusterState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock()) : "received a cluster state with a master block";
+
+                clusterService.submitStateUpdateTask("zen-disco-receive(from master [" + newClusterState.nodes().masterNode() + "])", Priority.URGENT, new ProcessedClusterStateNonMasterUpdateTask() {
                     @Override
                     public ClusterState execute(ClusterState currentState) {
                         // we already processed it in a previous event
@@ -642,6 +738,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
 
                             // we are going to use it for sure, poll (remove) it
                             potentialState = processNewClusterStates.poll();
+                            if (potentialState == null) {
+                                // might happen if the queue is drained
+                                break;
+                            }
+
                             potentialState.processed = true;
 
                             if (potentialState.clusterState.version() > stateToProcess.clusterState.version()) {
@@ -670,7 +771,16 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                             masterFD.restart(latestDiscoNodes.masterNode(), "new cluster state received and we are monitoring the wrong master [" + masterFD.masterNode() + "]");
                         }
 
+                        if (currentState.blocks().hasGlobalBlock(discoverySettings.getNoMasterBlock())) {
+                            // its a fresh update from the master as we transition from a start of not having a master to having one
+                            logger.debug("got first state from fresh master [{}]", updatedState.nodes().masterNodeId());
+                            return updatedState;
+                        }
+
+
+                        // some optimizations to make sure we keep old objects where possible
                         ClusterState.Builder builder = ClusterState.builder(updatedState);
+
                         // if the routing table did not change, use the original one
                         if (updatedState.routingTable().version() == currentState.routingTable().version()) {
                             builder.routingTable(currentState.routingTable());
@@ -726,37 +836,75 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
             // validate the join request, will throw a failure if it fails, which will get back to the
             // node calling the join request
             membership.sendValidateJoinRequestBlocking(node, joinTimeout);
-
+            processJoinRequests.add(new Tuple<>(node, callback));
             clusterService.submitStateUpdateTask("zen-disco-receive(join from node[" + node + "])", Priority.IMMEDIATE, new ProcessedClusterStateUpdateTask() {
+
+                private final List<Tuple<DiscoveryNode, MembershipAction.JoinCallback>> drainedTasks = new ArrayList<>();
+
                 @Override
                 public ClusterState execute(ClusterState currentState) {
-                    if (currentState.nodes().nodeExists(node.id())) {
-                        // the node already exists in the cluster
-                        logger.info("received a join request for an existing node [{}]", node);
-                        // still send a new cluster state, so it will be re published and possibly update the other node
-                        return ClusterState.builder(currentState).build();
+                    processJoinRequests.drainTo(drainedTasks);
+                    if (drainedTasks.isEmpty()) {
+                        return currentState;
                     }
-                    DiscoveryNodes.Builder builder = DiscoveryNodes.builder(currentState.nodes());
-                    for (DiscoveryNode existingNode : currentState.nodes()) {
-                        if (node.address().equals(existingNode.address())) {
-                            builder.remove(existingNode.id());
-                            logger.warn("received join request from node [{}], but found existing node {} with same address, removing existing node", node, existingNode);
+
+                    boolean modified = false;
+                    DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentState.nodes());
+                    for (Tuple<DiscoveryNode, MembershipAction.JoinCallback> task : drainedTasks) {
+                        DiscoveryNode node = task.v1();
+                        if (currentState.nodes().nodeExists(node.id())) {
+                            logger.debug("received a join request for an existing node [{}]", node);
+                        } else {
+                            modified = true;
+                            nodesBuilder.put(node);
+                            for (DiscoveryNode existingNode : currentState.nodes()) {
+                                if (node.address().equals(existingNode.address())) {
+                                    nodesBuilder.remove(existingNode.id());
+                                    logger.warn("received join request from node [{}], but found existing node {} with same address, removing existing node", node, existingNode);
+                                }
+                            }
+                        }
+                    }
+
+                    ClusterState.Builder stateBuilder = ClusterState.builder(currentState);
+                    if (modified) {
+                        latestDiscoNodes = nodesBuilder.build();
+                        stateBuilder.nodes(latestDiscoNodes);
+                    }
+                    return stateBuilder.build();
+                }
+
+                @Override
+                public void onNoLongerMaster(String source) {
+                    Exception e = new EsRejectedExecutionException("no longer master. source: [" + source + "]");
+                    innerOnFailure(e);
+                }
+
+                void innerOnFailure(Throwable t) {
+                    for (Tuple<DiscoveryNode, MembershipAction.JoinCallback> drainedTask : drainedTasks) {
+                        try {
+                            drainedTask.v2().onFailure(t);
+                        } catch (Exception e) {
+                            logger.error("error during task failure", e);
                         }
                     }
-                    latestDiscoNodes = builder.build();
-                    // add the new node now (will update latestDiscoNodes on publish)
-                    return ClusterState.builder(currentState).nodes(latestDiscoNodes.newNode(node)).build();
                 }
 
                 @Override
                 public void onFailure(String source, Throwable t) {
                     logger.error("unexpected failure during [{}]", t, source);
-                    callback.onFailure(t);
+                    innerOnFailure(t);
                 }
 
                 @Override
                 public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
-                    callback.onSuccess();
+                    for (Tuple<DiscoveryNode, MembershipAction.JoinCallback> drainedTask : drainedTasks) {
+                        try {
+                            drainedTask.v2().onSuccess();
+                        } catch (Exception e) {
+                            logger.error("unexpected error during [{}]", e, source);
+                        }
+                    }
                 }
             });
         }
@@ -807,35 +955,36 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         List<DiscoveryNode> pingMasters = newArrayList();
         for (ZenPing.PingResponse pingResponse : pingResponses) {
             if (pingResponse.master() != null) {
-                pingMasters.add(pingResponse.master());
+                // We can't include the local node in pingMasters list, otherwise we may up electing ourselves without
+                // any check / verifications from other nodes in ZenDiscover#innerJoinCluster()
+                if (!localNode.equals(pingResponse.master())) {
+                    pingMasters.add(pingResponse.master());
+                }
             }
         }
 
         Set<DiscoveryNode> possibleMasterNodes = Sets.newHashSet();
-        possibleMasterNodes.add(localNode);
+        if (localNode.masterNode()) {
+            possibleMasterNodes.add(localNode);
+        }
         for (ZenPing.PingResponse pingResponse : pingResponses) {
             possibleMasterNodes.add(pingResponse.target());
         }
-        // if we don't have enough master nodes, we bail, even if we get a response that indicates
-        // there is a master by other node, we don't see enough...
-        if (!electMaster.hasEnoughMasterNodes(possibleMasterNodes)) {
-            logger.trace("not enough master nodes [{}]", possibleMasterNodes);
-            return null;
-        }
 
         if (pingMasters.isEmpty()) {
-            // lets tie break between discovered nodes
-            DiscoveryNode electedMaster = electMaster.electMaster(possibleMasterNodes);
-            if (localNode.equals(electedMaster)) {
-                return localNode;
+            // if we don't have enough master nodes, we bail, because there are not enough master to elect from
+            if (electMaster.hasEnoughMasterNodes(possibleMasterNodes)) {
+                return electMaster.electMaster(possibleMasterNodes);
+            } else {
+                logger.trace("not enough master nodes [{}]", possibleMasterNodes);
+                return null;
             }
         } else {
-            DiscoveryNode electedMaster = electMaster.electMaster(pingMasters);
-            if (electedMaster != null) {
-                return electedMaster;
-            }
+
+            assert !pingMasters.contains(localNode) : "local node should never be elected as master when other nodes indicate an active master";
+            // lets tie break between discovered nodes
+            return electMaster.electMaster(pingMasters);
         }
-        return null;
     }
 
     private ClusterState rejoin(ClusterState clusterState, String reason) {
@@ -845,28 +994,45 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         master = false;
 
         ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(clusterState.blocks())
-                .addGlobalBlock(NO_MASTER_BLOCK)
-                .addGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK)
+                .addGlobalBlock(discoverySettings.getNoMasterBlock())
                 .build();
 
-        // clear the routing table, we have no master, so we need to recreate the routing when we reform the cluster
-        RoutingTable routingTable = RoutingTable.builder().build();
-        // we also clean the metadata, since we are going to recover it if we become master
-        MetaData metaData = MetaData.builder().build();
-
         // clean the nodes, we are now not connected to anybody, since we try and reform the cluster
-        latestDiscoNodes = new DiscoveryNodes.Builder().put(localNode).localNodeId(localNode.id()).build();
+        latestDiscoNodes = new DiscoveryNodes.Builder(latestDiscoNodes).masterNodeId(null).build();
 
         asyncJoinCluster();
 
         return ClusterState.builder(clusterState)
                 .blocks(clusterBlocks)
                 .nodes(latestDiscoNodes)
-                .routingTable(routingTable)
-                .metaData(metaData)
                 .build();
     }
 
+    private ClusterState handleAnotherMaster(ClusterState localClusterState, final DiscoveryNode otherMaster, long otherClusterStateVersion, String reason) {
+        assert master : "handleAnotherMaster called but current node is not a master";
+        if (otherClusterStateVersion > localClusterState.version()) {
+            return rejoin(localClusterState, "zen-disco-discovered another master with a new cluster_state [" + otherMaster + "][" + reason + "]");
+        } else {
+            logger.warn("discovered [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster ([{}])", otherMaster, otherMaster, reason);
+            try {
+                // make sure we're connected to this node (connect to node does nothing if we're already connected)
+                // since the network connections are asymmetric, it may be that we received a state but have disconnected from the node
+                // in the past (after a master failure, for example)
+                transportService.connectToNode(otherMaster);
+                transportService.sendRequest(otherMaster, DISCOVERY_REJOIN_ACTION_NAME, new RejoinClusterRequest(localClusterState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
+
+                    @Override
+                    public void handleException(TransportException exp) {
+                        logger.warn("failed to send rejoin request to [{}]", exp, otherMaster);
+                    }
+                });
+            } catch (Exception e) {
+                logger.warn("failed to send rejoin request to [{}]", e, otherMaster);
+            }
+            return localClusterState;
+        }
+    }
+
     private void sendInitialStateEventIfNeeded() {
         if (initialStateSent.compareAndSet(false, true)) {
             for (InitialStateDiscoveryListener listener : initialStateListeners) {
@@ -895,12 +1061,48 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         }
     }
 
-    private class NodeFailureListener implements NodesFaultDetection.Listener {
+    private class NodeFaultDetectionListener extends NodesFaultDetection.Listener {
+
+        private final AtomicInteger pingsWhileMaster = new AtomicInteger(0);
 
         @Override
         public void onNodeFailure(DiscoveryNode node, String reason) {
             handleNodeFailure(node, reason);
         }
+
+        @Override
+        public void onPingReceived(final NodesFaultDetection.PingRequest pingRequest) {
+            // if we are master, we don't expect any fault detection from another node. If we get it
+            // means we potentially have two masters in the cluster.
+            if (!master) {
+                pingsWhileMaster.set(0);
+                return;
+            }
+
+            // nodes pre 1.4.0 do not send this information
+            if (pingRequest.masterNode() == null) {
+                return;
+            }
+
+            if (pingsWhileMaster.incrementAndGet() < maxPingsFromAnotherMaster) {
+                logger.trace("got a ping from another master {}. current ping count: [{}]", pingRequest.masterNode(), pingsWhileMaster.get());
+                return;
+            }
+            logger.debug("got a ping from another master {}. resolving who should rejoin. current ping count: [{}]", pingRequest.masterNode(), pingsWhileMaster.get());
+            clusterService.submitStateUpdateTask("ping from another master", Priority.URGENT, new ClusterStateUpdateTask() {
+
+                @Override
+                public ClusterState execute(ClusterState currentState) throws Exception {
+                    pingsWhileMaster.set(0);
+                    return handleAnotherMaster(currentState, pingRequest.masterNode(), pingRequest.clusterStateVersion(), "node fd ping");
+                }
+
+                @Override
+                public void onFailure(String source, Throwable t) {
+                    logger.debug("unexpected error during cluster state update task after pings from another master", t);
+                }
+            });
+        }
     }
 
     private class MasterNodeFailureListener implements MasterFaultDetection.Listener {
@@ -922,6 +1124,10 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
         }
     }
 
+    boolean isRejoinOnMasterGone() {
+        return rejoinOnMasterGone;
+    }
+
     static class RejoinClusterRequest extends TransportRequest {
 
         private String fromNodeId;
@@ -955,7 +1161,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
 
         @Override
         public void messageReceived(final RejoinClusterRequest request, final TransportChannel channel) throws Exception {
-            clusterService.submitStateUpdateTask("received a request to rejoin the cluster from [" + request.fromNodeId + "]", Priority.URGENT, new ClusterStateUpdateTask() {
+            clusterService.submitStateUpdateTask("received a request to rejoin the cluster from [" + request.fromNodeId + "]", Priority.URGENT, new ClusterStateNonMasterUpdateTask() {
                 @Override
                 public ClusterState execute(ClusterState currentState) {
                     try {
@@ -966,6 +1172,11 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                     return rejoin(currentState, "received a request to rejoin the cluster from [" + request.fromNodeId + "]");
                 }
 
+                @Override
+                public void onNoLongerMaster(String source) {
+                    // already logged
+                }
+
                 @Override
                 public void onFailure(String source, Throwable t) {
                     logger.error("unexpected failure during [{}]", t, source);
@@ -989,6 +1200,12 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
                         ZenDiscovery.this.electMaster.minimumMasterNodes(), minimumMasterNodes);
                 handleMinimumMasterNodesChanged(minimumMasterNodes);
             }
+
+            boolean rejoinOnMasterGone = settings.getAsBoolean(SETTING_REJOIN_ON_MASTER_GONE, ZenDiscovery.this.rejoinOnMasterGone);
+            if (rejoinOnMasterGone != ZenDiscovery.this.rejoinOnMasterGone) {
+                logger.info("updating {} from [{}] to [{}]", SETTING_REJOIN_ON_MASTER_GONE, ZenDiscovery.this.rejoinOnMasterGone, rejoinOnMasterGone);
+                ZenDiscovery.this.rejoinOnMasterGone = rejoinOnMasterGone;
+            }
         }
     }
 }
diff --git a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java
index e67c4e2af39..33987662bfa 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscoveryModule.java
@@ -23,6 +23,7 @@ import com.google.common.collect.Lists;
 import org.elasticsearch.common.inject.AbstractModule;
 import org.elasticsearch.common.inject.multibindings.Multibinder;
 import org.elasticsearch.discovery.Discovery;
+import org.elasticsearch.discovery.zen.elect.ElectMasterService;
 import org.elasticsearch.discovery.zen.ping.ZenPingService;
 import org.elasticsearch.discovery.zen.ping.unicast.UnicastHostsProvider;
 
@@ -44,6 +45,7 @@ public class ZenDiscoveryModule extends AbstractModule {
 
     @Override
     protected void configure() {
+        bind(ElectMasterService.class).asEagerSingleton();
         bind(ZenPingService.class).asEagerSingleton();
         Multibinder<UnicastHostsProvider> unicastHostsProviderMultibinder = Multibinder.newSetBinder(binder(), UnicastHostsProvider.class);
         for (Class<? extends UnicastHostsProvider> unicastHostProvider : unicastHostProviders) {
diff --git a/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java b/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java
index bcfa1dc2f02..9ba26387ec5 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/elect/ElectMasterService.java
@@ -24,12 +24,10 @@ import com.google.common.collect.Lists;
 import org.apache.lucene.util.CollectionUtil;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.common.component.AbstractComponent;
+import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 
-import java.util.Arrays;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
 
 /**
  *
@@ -42,6 +40,7 @@ public class ElectMasterService extends AbstractComponent {
 
     private volatile int minimumMasterNodes;
 
+    @Inject
     public ElectMasterService(Settings settings) {
         super(settings);
         this.minimumMasterNodes = settings.getAsInt(DISCOVERY_ZEN_MINIMUM_MASTER_NODES, -1);
@@ -69,6 +68,18 @@ public class ElectMasterService extends AbstractComponent {
         return count >= minimumMasterNodes;
     }
 
+    /**
+     * Returns the given nodes sorted by likelyhood of being elected as master, most likely first.
+     * Non-master nodes are not removed but are rather put in the end
+     * @param nodes
+     * @return
+     */
+    public List<DiscoveryNode> sortByMasterLikelihood(Iterable<DiscoveryNode> nodes) {
+        ArrayList<DiscoveryNode> sortedNodes = Lists.newArrayList(nodes);
+        CollectionUtil.introSort(sortedNodes, nodeComparator);
+        return sortedNodes;
+    }
+
     /**
      * Returns a list of the next possible masters.
      */
@@ -120,6 +131,12 @@ public class ElectMasterService extends AbstractComponent {
 
         @Override
         public int compare(DiscoveryNode o1, DiscoveryNode o2) {
+            if (o1.masterNode() && !o2.masterNode()) {
+                return -1;
+            }
+            if (!o1.masterNode() && o2.masterNode()) {
+                return 1;
+            }
             return o1.id().compareTo(o2.id());
         }
     }
diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/FaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/FaultDetection.java
new file mode 100644
index 00000000000..d3e644f2166
--- /dev/null
+++ b/src/main/java/org/elasticsearch/discovery/zen/fd/FaultDetection.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.discovery.zen.fd;
+
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.component.AbstractComponent;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.TransportConnectionListener;
+import org.elasticsearch.transport.TransportService;
+
+import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
+
+/**
+ * A base class for {@link org.elasticsearch.discovery.zen.fd.MasterFaultDetection} & {@link org.elasticsearch.discovery.zen.fd.NodesFaultDetection},
+ * making sure both use the same setting.
+ */
+public abstract class FaultDetection extends AbstractComponent {
+
+    public static final String SETTING_CONNECT_ON_NETWORK_DISCONNECT = "discovery.zen.fd.connect_on_network_disconnect";
+    public static final String SETTING_PING_INTERVAL = "discovery.zen.fd.ping_interval";
+    public static final String SETTING_PING_TIMEOUT = "discovery.zen.fd.ping_timeout";
+    public static final String SETTING_PING_RETRIES = "discovery.zen.fd.ping_retries";
+    public static final String SETTING_REGISTER_CONNECTION_LISTENER = "discovery.zen.fd.register_connection_listener";
+
+    protected final ThreadPool threadPool;
+    protected final ClusterName clusterName;
+    protected final TransportService transportService;
+
+    // used mainly for testing, should always be true
+    protected final boolean registerConnectionListener;
+    protected final FDConnectionListener connectionListener;
+    protected final boolean connectOnNetworkDisconnect;
+
+    protected final TimeValue pingInterval;
+    protected final TimeValue pingRetryTimeout;
+    protected final int pingRetryCount;
+
+    public FaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName) {
+        super(settings);
+        this.threadPool = threadPool;
+        this.transportService = transportService;
+        this.clusterName = clusterName;
+
+        this.connectOnNetworkDisconnect = settings.getAsBoolean(SETTING_CONNECT_ON_NETWORK_DISCONNECT, false);
+        this.pingInterval = settings.getAsTime(SETTING_PING_INTERVAL, timeValueSeconds(1));
+        this.pingRetryTimeout = settings.getAsTime(SETTING_PING_TIMEOUT, timeValueSeconds(30));
+        this.pingRetryCount = settings.getAsInt(SETTING_PING_RETRIES, 3);
+        this.registerConnectionListener = settings.getAsBoolean(SETTING_REGISTER_CONNECTION_LISTENER, true);
+
+        this.connectionListener = new FDConnectionListener();
+        if (registerConnectionListener) {
+            transportService.addConnectionListener(connectionListener);
+        }
+    }
+
+    public void close() {
+        transportService.removeConnectionListener(connectionListener);
+    }
+
+    /**
+     * This method will be called when the {@link org.elasticsearch.transport.TransportService} raised a node disconnected event
+     */
+    abstract void handleTransportDisconnect(DiscoveryNode node);
+
+    private class FDConnectionListener implements TransportConnectionListener {
+        @Override
+        public void onNodeConnected(DiscoveryNode node) {
+        }
+
+        @Override
+        public void onNodeDisconnected(DiscoveryNode node) {
+            handleTransportDisconnect(node);
+        }
+    }
+
+}
diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java
index 26fd2b00e94..49709b7905b 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/fd/MasterFaultDetection.java
@@ -20,9 +20,10 @@
 package org.elasticsearch.discovery.zen.fd;
 
 import org.elasticsearch.ElasticsearchIllegalStateException;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
-import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.settings.Settings;
@@ -35,13 +36,12 @@ import java.io.IOException;
 import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.atomic.AtomicBoolean;
 
-import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
 import static org.elasticsearch.transport.TransportRequestOptions.options;
 
 /**
  * A fault detection that pings the master periodically to see if its alive.
  */
-public class MasterFaultDetection extends AbstractComponent {
+public class MasterFaultDetection extends FaultDetection {
 
     public static final String MASTER_PING_ACTION_NAME = "internal:discovery/zen/fd/master_ping";
 
@@ -52,29 +52,10 @@ public class MasterFaultDetection extends AbstractComponent {
         void onDisconnectedFromMaster();
     }
 
-    private final ThreadPool threadPool;
-
-    private final TransportService transportService;
-
     private final DiscoveryNodesProvider nodesProvider;
 
     private final CopyOnWriteArrayList<Listener> listeners = new CopyOnWriteArrayList<>();
 
-
-    private final boolean connectOnNetworkDisconnect;
-
-    private final TimeValue pingInterval;
-
-    private final TimeValue pingRetryTimeout;
-
-    private final int pingRetryCount;
-
-    // used mainly for testing, should always be true
-    private final boolean registerConnectionListener;
-
-
-    private final FDConnectionListener connectionListener;
-
     private volatile MasterPinger masterPinger;
 
     private final Object masterNodeMutex = new Object();
@@ -85,25 +66,13 @@ public class MasterFaultDetection extends AbstractComponent {
 
     private final AtomicBoolean notifiedMasterFailure = new AtomicBoolean();
 
-    public MasterFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, DiscoveryNodesProvider nodesProvider) {
-        super(settings);
-        this.threadPool = threadPool;
-        this.transportService = transportService;
+    public MasterFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService,
+                                DiscoveryNodesProvider nodesProvider, ClusterName clusterName) {
+        super(settings, threadPool, transportService, clusterName);
         this.nodesProvider = nodesProvider;
 
-        this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true);
-        this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1));
-        this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30));
-        this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3);
-        this.registerConnectionListener = componentSettings.getAsBoolean("register_connection_listener", true);
-
         logger.debug("[master] uses ping_interval [{}], ping_timeout [{}], ping_retries [{}]", pingInterval, pingRetryTimeout, pingRetryCount);
 
-        this.connectionListener = new FDConnectionListener();
-        if (registerConnectionListener) {
-            transportService.addConnectionListener(connectionListener);
-        }
-
         transportService.registerHandler(MASTER_PING_ACTION_NAME, new MasterPingRequestHandler());
     }
 
@@ -155,7 +124,8 @@ public class MasterFaultDetection extends AbstractComponent {
             masterPinger.stop();
         }
         this.masterPinger = new MasterPinger();
-        // start the ping process
+
+        // we start pinging slightly later to allow the chosen master to complete it's own master election
         threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
     }
 
@@ -181,13 +151,14 @@ public class MasterFaultDetection extends AbstractComponent {
     }
 
     public void close() {
+        super.close();
         stop("closing");
         this.listeners.clear();
-        transportService.removeConnectionListener(connectionListener);
         transportService.removeHandler(MASTER_PING_ACTION_NAME);
     }
 
-    private void handleTransportDisconnect(DiscoveryNode node) {
+    @Override
+    protected void handleTransportDisconnect(DiscoveryNode node) {
         synchronized (masterNodeMutex) {
             if (!node.equals(this.masterNode)) {
                 return;
@@ -200,7 +171,8 @@ public class MasterFaultDetection extends AbstractComponent {
                         masterPinger.stop();
                     }
                     this.masterPinger = new MasterPinger();
-                    threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
+                    // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
+                    threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
                 } catch (Exception e) {
                     logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
                     notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
@@ -237,17 +209,6 @@ public class MasterFaultDetection extends AbstractComponent {
         }
     }
 
-    private class FDConnectionListener implements TransportConnectionListener {
-        @Override
-        public void onNodeConnected(DiscoveryNode node) {
-        }
-
-        @Override
-        public void onNodeDisconnected(DiscoveryNode node) {
-            handleTransportDisconnect(node);
-        }
-    }
-
     private class MasterPinger implements Runnable {
 
         private volatile boolean running = true;
@@ -268,8 +229,10 @@ public class MasterFaultDetection extends AbstractComponent {
                 threadPool.schedule(pingInterval, ThreadPool.Names.SAME, MasterPinger.this);
                 return;
             }
-            transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id()), options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout),
-                    new BaseTransportResponseHandler<MasterPingResponseResponse>() {
+            final MasterPingRequest request = new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id(), clusterName);
+            final TransportRequestOptions options = options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout);
+            transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, request, options, new BaseTransportResponseHandler<MasterPingResponseResponse>() {
+
                         @Override
                         public MasterPingResponseResponse newInstance() {
                             return new MasterPingResponseResponse();
@@ -326,7 +289,7 @@ public class MasterFaultDetection extends AbstractComponent {
                                         notifyMasterFailure(masterToPing, "failed to ping, tried [" + pingRetryCount + "] times, each with  maximum [" + pingRetryTimeout + "] timeout");
                                     } else {
                                         // resend the request, not reschedule, rely on send timeout
-                                        transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, new MasterPingRequest(nodesProvider.nodes().localNode().id(), masterToPing.id()), options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout), this);
+                                        transportService.sendRequest(masterToPing, MASTER_PING_ACTION_NAME, request, options, this);
                                     }
                                 }
                             }
@@ -349,6 +312,14 @@ public class MasterFaultDetection extends AbstractComponent {
     }
 
     static class NotMasterException extends ElasticsearchIllegalStateException {
+
+        NotMasterException(String msg) {
+            super(msg);
+        }
+
+        NotMasterException() {
+        }
+
         @Override
         public Throwable fillInStackTrace() {
             return null;
@@ -377,6 +348,13 @@ public class MasterFaultDetection extends AbstractComponent {
             if (!request.masterNodeId.equals(nodes.localNodeId())) {
                 throw new NotMasterException();
             }
+
+            // ping from nodes of version < 1.4.0 will have the clustername set to null
+            if (request.clusterName != null && !request.clusterName.equals(clusterName)) {
+                logger.trace("master fault detection ping request is targeted for a different [{}] cluster then us [{}]", request.clusterName, clusterName);
+                throw new NotMasterException("master fault detection ping request is targeted for a different [" + request.clusterName + "] cluster then us [" + clusterName + "]");
+            }
+
             // if we are no longer master, fail...
             if (!nodes.localNodeMaster()) {
                 throw new NoLongerMasterException();
@@ -400,13 +378,15 @@ public class MasterFaultDetection extends AbstractComponent {
         private String nodeId;
 
         private String masterNodeId;
+        private ClusterName clusterName;
 
         private MasterPingRequest() {
         }
 
-        private MasterPingRequest(String nodeId, String masterNodeId) {
+        private MasterPingRequest(String nodeId, String masterNodeId, ClusterName clusterName) {
             this.nodeId = nodeId;
             this.masterNodeId = masterNodeId;
+            this.clusterName = clusterName;
         }
 
         @Override
@@ -414,6 +394,9 @@ public class MasterFaultDetection extends AbstractComponent {
             super.readFrom(in);
             nodeId = in.readString();
             masterNodeId = in.readString();
+            if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
+                clusterName = ClusterName.readClusterName(in);
+            }
         }
 
         @Override
@@ -421,6 +404,9 @@ public class MasterFaultDetection extends AbstractComponent {
             super.writeTo(out);
             out.writeString(nodeId);
             out.writeString(masterNodeId);
+            if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
+                clusterName.writeTo(out);
+            }
         }
     }
 
diff --git a/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java b/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java
index 6f4e403610c..90012099116 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/fd/NodesFaultDetection.java
@@ -20,9 +20,11 @@
 package org.elasticsearch.discovery.zen.fd;
 
 import org.elasticsearch.ElasticsearchIllegalStateException;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.ClusterState;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
-import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.settings.Settings;
@@ -35,68 +37,40 @@ import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.CopyOnWriteArrayList;
 
 import static org.elasticsearch.cluster.node.DiscoveryNodes.EMPTY_NODES;
-import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
 import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap;
 import static org.elasticsearch.transport.TransportRequestOptions.options;
 
 /**
  * A fault detection of multiple nodes.
  */
-public class NodesFaultDetection extends AbstractComponent {
+public class NodesFaultDetection extends FaultDetection {
 
     public static final String PING_ACTION_NAME = "internal:discovery/zen/fd/ping";
+    
+    public abstract static class Listener {
 
-    public static interface Listener {
+        public void onNodeFailure(DiscoveryNode node, String reason) {}
+
+        public void onPingReceived(PingRequest pingRequest) {}
 
-        void onNodeFailure(DiscoveryNode node, String reason);
     }
 
-    private final ThreadPool threadPool;
-
-    private final TransportService transportService;
-
-
-    private final boolean connectOnNetworkDisconnect;
-
-    private final TimeValue pingInterval;
-
-    private final TimeValue pingRetryTimeout;
-
-    private final int pingRetryCount;
-
-    // used mainly for testing, should always be true
-    private final boolean registerConnectionListener;
-
-
     private final CopyOnWriteArrayList<Listener> listeners = new CopyOnWriteArrayList<>();
 
     private final ConcurrentMap<DiscoveryNode, NodeFD> nodesFD = newConcurrentMap();
 
-    private final FDConnectionListener connectionListener;
-
     private volatile DiscoveryNodes latestNodes = EMPTY_NODES;
 
+    private volatile long clusterStateVersion = ClusterState.UNKNOWN_VERSION;
+
     private volatile boolean running = false;
 
-    public NodesFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService) {
-        super(settings);
-        this.threadPool = threadPool;
-        this.transportService = transportService;
-
-        this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true);
-        this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1));
-        this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30));
-        this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3);
-        this.registerConnectionListener = componentSettings.getAsBoolean("register_connection_listener", true);
+    public NodesFaultDetection(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName) {
+        super(settings, threadPool, transportService, clusterName);
 
         logger.debug("[node  ] uses ping_interval [{}], ping_timeout [{}], ping_retries [{}]", pingInterval, pingRetryTimeout, pingRetryCount);
 
         transportService.registerHandler(PING_ACTION_NAME, new PingRequestHandler());
-
-        this.connectionListener = new FDConnectionListener();
-        if (registerConnectionListener) {
-            transportService.addConnectionListener(connectionListener);
-        }
     }
 
     public void addListener(Listener listener) {
@@ -107,9 +81,10 @@ public class NodesFaultDetection extends AbstractComponent {
         listeners.remove(listener);
     }
 
-    public void updateNodes(DiscoveryNodes nodes) {
+    public void updateNodes(DiscoveryNodes nodes, long clusterStateVersion) {
         DiscoveryNodes prevNodes = latestNodes;
         this.latestNodes = nodes;
+        this.clusterStateVersion = clusterStateVersion;
         if (!running) {
             return;
         }
@@ -121,7 +96,8 @@ public class NodesFaultDetection extends AbstractComponent {
             }
             if (!nodesFD.containsKey(newNode)) {
                 nodesFD.put(newNode, new NodeFD());
-                threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
+                // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
+                threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
             }
         }
         for (DiscoveryNode removedNode : delta.removedNodes()) {
@@ -146,12 +122,13 @@ public class NodesFaultDetection extends AbstractComponent {
     }
 
     public void close() {
+        super.close();
         stop();
         transportService.removeHandler(PING_ACTION_NAME);
-        transportService.removeConnectionListener(connectionListener);
     }
 
-    private void handleTransportDisconnect(DiscoveryNode node) {
+    @Override
+    protected void handleTransportDisconnect(DiscoveryNode node) {
         if (!latestNodes.nodeExists(node.id())) {
             return;
         }
@@ -167,7 +144,8 @@ public class NodesFaultDetection extends AbstractComponent {
             try {
                 transportService.connectToNode(node);
                 nodesFD.put(node, new NodeFD());
-                threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
+                // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
+                threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
             } catch (Exception e) {
                 logger.trace("[node  ] [{}] transport disconnected (with verified connect)", node);
                 notifyNodeFailure(node, "transport disconnected (with verified connect)");
@@ -189,6 +167,19 @@ public class NodesFaultDetection extends AbstractComponent {
         });
     }
 
+    private void notifyPingReceived(final PingRequest pingRequest) {
+        threadPool.generic().execute(new Runnable() {
+
+            @Override
+            public void run() {
+                for (Listener listener : listeners) {
+                    listener.onPingReceived(pingRequest);
+                }
+            }
+
+        });
+    }
+
     private class SendPingRequest implements Runnable {
 
         private final DiscoveryNode node;
@@ -202,8 +193,9 @@ public class NodesFaultDetection extends AbstractComponent {
             if (!running) {
                 return;
             }
-            transportService.sendRequest(node, PING_ACTION_NAME, new PingRequest(node.id()), options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout),
-                    new BaseTransportResponseHandler<PingResponse>() {
+            final PingRequest pingRequest = new PingRequest(node.id(), clusterName, latestNodes.localNode(), clusterStateVersion);
+            final TransportRequestOptions options = options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout);
+            transportService.sendRequest(node, PING_ACTION_NAME, pingRequest, options, new BaseTransportResponseHandler<PingResponse>() {
                         @Override
                         public PingResponse newInstance() {
                             return new PingResponse();
@@ -250,8 +242,7 @@ public class NodesFaultDetection extends AbstractComponent {
                                     }
                                 } else {
                                     // resend the request, not reschedule, rely on send timeout
-                                    transportService.sendRequest(node, PING_ACTION_NAME, new PingRequest(node.id()),
-                                            options().withType(TransportRequestOptions.Type.PING).withTimeout(pingRetryTimeout), this);
+                                    transportService.sendRequest(node, PING_ACTION_NAME, pingRequest, options, this);
                                 }
                             }
                         }
@@ -270,18 +261,6 @@ public class NodesFaultDetection extends AbstractComponent {
         volatile boolean running = true;
     }
 
-    private class FDConnectionListener implements TransportConnectionListener {
-        @Override
-        public void onNodeConnected(DiscoveryNode node) {
-        }
-
-        @Override
-        public void onNodeDisconnected(DiscoveryNode node) {
-            handleTransportDisconnect(node);
-        }
-    }
-
-
     class PingRequestHandler extends BaseTransportRequestHandler<PingRequest> {
 
         @Override
@@ -296,6 +275,15 @@ public class NodesFaultDetection extends AbstractComponent {
             if (!latestNodes.localNodeId().equals(request.nodeId)) {
                 throw new ElasticsearchIllegalStateException("Got pinged as node [" + request.nodeId + "], but I am node [" + latestNodes.localNodeId() + "]");
             }
+
+            // PingRequest will have clusterName set to null if it came from a node of version <1.4.0
+            if (request.clusterName != null && !request.clusterName.equals(clusterName)) {
+                // Don't introduce new exception for bwc reasons
+                throw new ElasticsearchIllegalStateException("Got pinged with cluster name [" + request.clusterName + "], but I'm part of cluster [" + clusterName + "]");
+            }
+
+            notifyPingReceived(request);
+
             channel.sendResponse(new PingResponse());
         }
 
@@ -306,28 +294,63 @@ public class NodesFaultDetection extends AbstractComponent {
     }
 
 
-    static class PingRequest extends TransportRequest {
+    public static class PingRequest extends TransportRequest {
 
         // the (assumed) node id we are pinging
         private String nodeId;
 
+        private ClusterName clusterName;
+
+        private DiscoveryNode masterNode;
+
+        private long clusterStateVersion = ClusterState.UNKNOWN_VERSION;
+
         PingRequest() {
         }
 
-        PingRequest(String nodeId) {
+        PingRequest(String nodeId, ClusterName clusterName, DiscoveryNode masterNode, long clusterStateVersion) {
             this.nodeId = nodeId;
+            this.clusterName = clusterName;
+            this.masterNode = masterNode;
+            this.clusterStateVersion = clusterStateVersion;
+        }
+
+        public String nodeId() {
+            return nodeId;
+        }
+
+        public ClusterName clusterName() {
+            return clusterName;
+        }
+
+        public DiscoveryNode masterNode() {
+            return masterNode;
+        }
+
+        public long clusterStateVersion() {
+            return clusterStateVersion;
         }
 
         @Override
         public void readFrom(StreamInput in) throws IOException {
             super.readFrom(in);
             nodeId = in.readString();
+            if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
+                clusterName = ClusterName.readClusterName(in);
+                masterNode = DiscoveryNode.readNode(in);
+                clusterStateVersion = in.readLong();
+            }
         }
 
         @Override
         public void writeTo(StreamOutput out) throws IOException {
             super.writeTo(out);
             out.writeString(nodeId);
+            if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
+                clusterName.writeTo(out);
+                masterNode.writeTo(out);
+                out.writeLong(clusterStateVersion);
+            }
         }
     }
 
diff --git a/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java b/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java
index 53ee9248eac..39f710f7acd 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/ping/ZenPingService.java
@@ -34,6 +34,7 @@ import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
 import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
 import org.elasticsearch.discovery.zen.DiscoveryNodesProvider;
+import org.elasticsearch.discovery.zen.elect.ElectMasterService;
 import org.elasticsearch.discovery.zen.ping.multicast.MulticastZenPing;
 import org.elasticsearch.discovery.zen.ping.unicast.UnicastHostsProvider;
 import org.elasticsearch.discovery.zen.ping.unicast.UnicastZenPing;
@@ -55,20 +56,20 @@ public class ZenPingService extends AbstractLifecycleComponent<ZenPing> implemen
 
     // here for backward comp. with discovery plugins
     public ZenPingService(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, NetworkService networkService,
-                          @Nullable Set<UnicastHostsProvider> unicastHostsProviders) {
-        this(settings, threadPool, transportService, clusterName, networkService, Version.CURRENT, unicastHostsProviders);
+                          ElectMasterService electMasterService, @Nullable Set<UnicastHostsProvider> unicastHostsProviders) {
+        this(settings, threadPool, transportService, clusterName, networkService, Version.CURRENT, electMasterService, unicastHostsProviders);
     }
 
     @Inject
     public ZenPingService(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, NetworkService networkService,
-                          Version version, @Nullable Set<UnicastHostsProvider> unicastHostsProviders) {
+                          Version version, ElectMasterService electMasterService, @Nullable Set<UnicastHostsProvider> unicastHostsProviders) {
         super(settings);
         ImmutableList.Builder<ZenPing> zenPingsBuilder = ImmutableList.builder();
         if (componentSettings.getAsBoolean("multicast.enabled", true)) {
             zenPingsBuilder.add(new MulticastZenPing(settings, threadPool, transportService, clusterName, networkService, version));
         }
         // always add the unicast hosts, so it will be able to receive unicast requests even when working in multicast
-        zenPingsBuilder.add(new UnicastZenPing(settings, threadPool, transportService, clusterName, version, unicastHostsProviders));
+        zenPingsBuilder.add(new UnicastZenPing(settings, threadPool, transportService, clusterName, version, electMasterService, unicastHostsProviders));
 
         this.zenPings = zenPingsBuilder.build();
     }
diff --git a/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java b/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java
index 25a43ead8ef..123f2d7fc7f 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPing.java
@@ -19,8 +19,12 @@
 
 package org.elasticsearch.discovery.zen.ping.unicast;
 
+import com.carrotsearch.hppc.cursors.ObjectCursor;
 import com.google.common.collect.Lists;
-import org.elasticsearch.*;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.ElasticsearchIllegalArgumentException;
+import org.elasticsearch.ElasticsearchIllegalStateException;
+import org.elasticsearch.Version;
 import org.elasticsearch.cluster.ClusterName;
 import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.cluster.node.DiscoveryNodes;
@@ -35,6 +39,7 @@ import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.common.util.concurrent.EsRejectedExecutionException;
 import org.elasticsearch.discovery.zen.DiscoveryNodesProvider;
+import org.elasticsearch.discovery.zen.elect.ElectMasterService;
 import org.elasticsearch.discovery.zen.ping.ZenPing;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.*;
@@ -62,10 +67,11 @@ public class UnicastZenPing extends AbstractLifecycleComponent<ZenPing> implemen
     private final ThreadPool threadPool;
     private final TransportService transportService;
     private final ClusterName clusterName;
+    private final ElectMasterService electMasterService;
 
     private final int concurrentConnects;
 
-    private final DiscoveryNode[] nodes;
+    private final DiscoveryNode[] configuredTargetNodes;
 
     private volatile DiscoveryNodesProvider nodesProvider;
 
@@ -73,16 +79,18 @@ public class UnicastZenPing extends AbstractLifecycleComponent<ZenPing> implemen
 
     private final Map<Integer, ConcurrentMap<DiscoveryNode, PingResponse>> receivedResponses = newConcurrentMap();
 
-    // a list of temporal responses a node will return for a request (holds requests from other nodes)
+    // a list of temporal responses a node will return for a request (holds requests from other configuredTargetNodes)
     private final Queue<PingResponse> temporalResponses = ConcurrentCollections.newQueue();
 
     private final CopyOnWriteArrayList<UnicastHostsProvider> hostsProviders = new CopyOnWriteArrayList<>();
 
-    public UnicastZenPing(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName, Version version, @Nullable Set<UnicastHostsProvider> unicastHostsProviders) {
+    public UnicastZenPing(Settings settings, ThreadPool threadPool, TransportService transportService, ClusterName clusterName,
+                          Version version, ElectMasterService electMasterService, @Nullable Set<UnicastHostsProvider> unicastHostsProviders) {
         super(settings);
         this.threadPool = threadPool;
         this.transportService = transportService;
         this.clusterName = clusterName;
+        this.electMasterService = electMasterService;
 
         if (unicastHostsProviders != null) {
             for (UnicastHostsProvider unicastHostsProvider : unicastHostsProviders) {
@@ -99,20 +107,20 @@ public class UnicastZenPing extends AbstractLifecycleComponent<ZenPing> implemen
         List<String> hosts = Lists.newArrayList(hostArr);
         logger.debug("using initial hosts {}, with concurrent_connects [{}]", hosts, concurrentConnects);
 
-        List<DiscoveryNode> nodes = Lists.newArrayList();
+        List<DiscoveryNode> configuredTargetNodes = Lists.newArrayList();
         int idCounter = 0;
         for (String host : hosts) {
             try {
                 TransportAddress[] addresses = transportService.addressesFromString(host);
                 // we only limit to 1 addresses, makes no sense to ping 100 ports
                 for (int i = 0; (i < addresses.length && i < LIMIT_PORTS_COUNT); i++) {
-                    nodes.add(new DiscoveryNode("#zen_unicast_" + (++idCounter) + "#", addresses[i], version.minimumCompatibilityVersion()));
+                    configuredTargetNodes.add(new DiscoveryNode("#zen_unicast_" + (++idCounter) + "#", addresses[i], version.minimumCompatibilityVersion()));
                 }
             } catch (Exception e) {
                 throw new ElasticsearchIllegalArgumentException("Failed to resolve address for [" + host + "]", e);
             }
         }
-        this.nodes = nodes.toArray(new DiscoveryNode[nodes.size()]);
+        this.configuredTargetNodes = configuredTargetNodes.toArray(new DiscoveryNode[configuredTargetNodes.size()]);
 
         transportService.registerHandler(ACTION_NAME, new UnicastPingRequestHandler());
     }
@@ -143,6 +151,13 @@ public class UnicastZenPing extends AbstractLifecycleComponent<ZenPing> implemen
         this.nodesProvider = nodesProvider;
     }
 
+    /**
+     * Clears the list of cached ping responses.
+     */
+    public void clearTemporalReponses() {
+        temporalResponses.clear();
+    }
+
     public PingResponse[] pingAndWait(TimeValue timeout) {
         final AtomicReference<PingResponse[]> response = new AtomicReference<>();
         final CountDownLatch latch = new CountDownLatch(1);
@@ -237,18 +252,30 @@ public class UnicastZenPing extends AbstractLifecycleComponent<ZenPing> implemen
         DiscoveryNodes discoNodes = nodesProvider.nodes();
         pingRequest.pingResponse = new PingResponse(discoNodes.localNode(), discoNodes.masterNode(), clusterName);
 
-        HashSet<DiscoveryNode> nodesToPing = new HashSet<>(Arrays.asList(nodes));
+        HashSet<DiscoveryNode> nodesToPingSet = new HashSet<>();
         for (PingResponse temporalResponse : temporalResponses) {
             // Only send pings to nodes that have the same cluster name.
             if (clusterName.equals(temporalResponse.clusterName())) {
-                nodesToPing.add(temporalResponse.target());
+                nodesToPingSet.add(temporalResponse.target());
             }
         }
 
         for (UnicastHostsProvider provider : hostsProviders) {
-            nodesToPing.addAll(provider.buildDynamicNodes());
+            nodesToPingSet.addAll(provider.buildDynamicNodes());
         }
 
+        // add all possible master nodes that were active in the last known cluster configuration
+        for (ObjectCursor<DiscoveryNode> masterNode : discoNodes.getMasterNodes().values()) {
+            nodesToPingSet.add(masterNode.value);
+        }
+
+        // sort the nodes by likelihood of being an active master
+        List<DiscoveryNode> sortedNodesToPing = electMasterService.sortByMasterLikelihood(nodesToPingSet);
+
+        // new add the the unicast targets first
+        ArrayList<DiscoveryNode> nodesToPing = Lists.newArrayList(configuredTargetNodes);
+        nodesToPing.addAll(sortedNodesToPing);
+
         final CountDownLatch latch = new CountDownLatch(nodesToPing.size());
         for (final DiscoveryNode node : nodesToPing) {
             // make sure we are connected
diff --git a/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java b/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java
index d716a336a05..1e46bbb0171 100644
--- a/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java
+++ b/src/main/java/org/elasticsearch/discovery/zen/publish/PublishClusterStateAction.java
@@ -40,6 +40,7 @@ import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.*;
 
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 /**
  *
@@ -85,12 +86,15 @@ public class PublishClusterStateAction extends AbstractComponent {
         publish(clusterState, new AckClusterStatePublishResponseHandler(clusterState.nodes().size() - 1, ackListener));
     }
 
-    private void publish(ClusterState clusterState, final ClusterStatePublishResponseHandler publishResponseHandler) {
+    private void publish(final ClusterState clusterState, final ClusterStatePublishResponseHandler publishResponseHandler) {
 
         DiscoveryNode localNode = nodesProvider.nodes().localNode();
 
         Map<Version, BytesReference> serializedStates = Maps.newHashMap();
 
+        final AtomicBoolean timedOutWaitingForNodes = new AtomicBoolean(false);
+        final TimeValue publishTimeout = discoverySettings.getPublishTimeout();
+
         for (final DiscoveryNode node : clusterState.nodes()) {
             if (node.equals(localNode)) {
                 continue;
@@ -125,28 +129,30 @@ public class PublishClusterStateAction extends AbstractComponent {
 
                             @Override
                             public void handleResponse(TransportResponse.Empty response) {
+                                if (timedOutWaitingForNodes.get()) {
+                                    logger.debug("node {} responded for cluster state [{}] (took longer than [{}])", node, clusterState.version(), publishTimeout);
+                                }
                                 publishResponseHandler.onResponse(node);
                             }
 
                             @Override
                             public void handleException(TransportException exp) {
-                                logger.debug("failed to send cluster state to [{}]", exp, node);
+                                logger.debug("failed to send cluster state to {}", exp, node);
                                 publishResponseHandler.onFailure(node, exp);
                             }
                         });
             } catch (Throwable t) {
-                logger.debug("error sending cluster state to [{}]", t, node);
+                logger.debug("error sending cluster state to {}", t, node);
                 publishResponseHandler.onFailure(node, t);
             }
         }
 
-        TimeValue publishTimeout = discoverySettings.getPublishTimeout();
         if (publishTimeout.millis() > 0) {
             // only wait if the publish timeout is configured...
             try {
-                boolean awaited = publishResponseHandler.awaitAllNodes(publishTimeout);
-                if (!awaited) {
-                    logger.debug("awaiting all nodes to process published state {} timed out, timeout {}", clusterState.version(), publishTimeout);
+                timedOutWaitingForNodes.set(!publishResponseHandler.awaitAllNodes(publishTimeout));
+                if (timedOutWaitingForNodes.get()) {
+                    logger.debug("timed out waiting for all nodes to process published state [{}] (timeout [{}])", clusterState.version(), publishTimeout);
                 }
             } catch (InterruptedException e) {
                 // ignore & restore interrupt
diff --git a/src/main/java/org/elasticsearch/gateway/GatewayService.java b/src/main/java/org/elasticsearch/gateway/GatewayService.java
index 5f5eaa8e3e5..827a6559bf9 100644
--- a/src/main/java/org/elasticsearch/gateway/GatewayService.java
+++ b/src/main/java/org/elasticsearch/gateway/GatewayService.java
@@ -35,7 +35,6 @@ import org.elasticsearch.common.component.AbstractLifecycleComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.discovery.Discovery;
 import org.elasticsearch.discovery.DiscoveryService;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.threadpool.ThreadPool;
@@ -134,12 +133,6 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
         if (lifecycle.stoppedOrClosed()) {
             return;
         }
-        if (event.state().blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) {
-            // we need to clear those flags, since we might need to recover again in case we disconnect
-            // from the cluster and then reconnect
-            recovered.set(false);
-            scheduledRecovery.set(false);
-        }
         if (event.localNodeMaster() && event.state().blocks().hasGlobalBlock(STATE_NOT_RECOVERED_BLOCK)) {
             checkStateMeetsSettingsAndMaybeRecover(event.state(), true);
         }
@@ -147,7 +140,7 @@ public class GatewayService extends AbstractLifecycleComponent<GatewayService> i
 
     protected void checkStateMeetsSettingsAndMaybeRecover(ClusterState state, boolean asyncRecovery) {
         DiscoveryNodes nodes = state.nodes();
-        if (state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK)) {
+        if (state.blocks().hasGlobalBlock(discoveryService.getNoMasterBlock())) {
             logger.debug("not recovering from gateway, no master elected yet");
         } else if (recoverAfterNodes != -1 && (nodes.masterAndDataNodes().size()) < recoverAfterNodes) {
             logger.debug("not recovering from gateway, nodes_size (data+master) [" + nodes.masterAndDataNodes().size() + "] < recover_after_nodes [" + recoverAfterNodes + "]");
diff --git a/src/main/java/org/elasticsearch/indices/store/IndicesStore.java b/src/main/java/org/elasticsearch/indices/store/IndicesStore.java
index 02420d0e3d5..ecf5e6b6b22 100644
--- a/src/main/java/org/elasticsearch/indices/store/IndicesStore.java
+++ b/src/main/java/org/elasticsearch/indices/store/IndicesStore.java
@@ -307,7 +307,7 @@ public class IndicesStore extends AbstractComponent implements ClusterStateListe
                 return;
             }
 
-            clusterService.submitStateUpdateTask("indices_store", new ClusterStateUpdateTask() {
+            clusterService.submitStateUpdateTask("indices_store", new ClusterStateNonMasterUpdateTask() {
                 @Override
                 public ClusterState execute(ClusterState currentState) throws Exception {
                     if (clusterState.getVersion() != currentState.getVersion()) {
diff --git a/src/main/java/org/elasticsearch/transport/TransportService.java b/src/main/java/org/elasticsearch/transport/TransportService.java
index e922f1b4932..e2e6f502e89 100644
--- a/src/main/java/org/elasticsearch/transport/TransportService.java
+++ b/src/main/java/org/elasticsearch/transport/TransportService.java
@@ -245,6 +245,10 @@ public class TransportService extends AbstractLifecycleComponent<TransportServic
         }
     }
 
+    protected TransportRequestHandler getHandler(String action) {
+        return serverHandlers.get(action);
+    }
+
     class Adapter implements TransportServiceAdapter {
 
         final MeanMetric rxMetric = new MeanMetric();
diff --git a/src/main/java/org/elasticsearch/transport/local/LocalTransport.java b/src/main/java/org/elasticsearch/transport/local/LocalTransport.java
index 142f33f9d07..627f37c61a4 100644
--- a/src/main/java/org/elasticsearch/transport/local/LocalTransport.java
+++ b/src/main/java/org/elasticsearch/transport/local/LocalTransport.java
@@ -33,6 +33,7 @@ import org.elasticsearch.common.transport.BoundTransportAddress;
 import org.elasticsearch.common.transport.LocalTransportAddress;
 import org.elasticsearch.common.transport.TransportAddress;
 import org.elasticsearch.common.util.concurrent.AbstractRunnable;
+import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.*;
 import org.elasticsearch.transport.support.TransportStatus;
@@ -40,6 +41,8 @@ import org.elasticsearch.transport.support.TransportStatus;
 import java.io.IOException;
 import java.util.Map;
 import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
 import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.newConcurrentMap;
@@ -50,6 +53,7 @@ import static org.elasticsearch.common.util.concurrent.ConcurrentCollections.new
 public class LocalTransport extends AbstractLifecycleComponent<Transport> implements Transport {
 
     private final ThreadPool threadPool;
+    private final ThreadPoolExecutor workers;
     private final Version version;
     private volatile TransportServiceAdapter transportServiceAdapter;
     private volatile BoundTransportAddress boundAddress;
@@ -58,13 +62,20 @@ public class LocalTransport extends AbstractLifecycleComponent<Transport> implem
     private static final AtomicLong transportAddressIdGenerator = new AtomicLong();
     private final ConcurrentMap<DiscoveryNode, LocalTransport> connectedNodes = newConcurrentMap();
 
-    public static final String TRANSPORT_LOCAL_ADDRESS = "transport.local_address";
+    public static final String TRANSPORT_LOCAL_ADDRESS = "transport.local.address";
+    public static final String TRANSPORT_LOCAL_WORKERS = "transport.local.workers";
+    public static final String TRANSPORT_LOCAL_QUEUE = "transport.local.queue";
 
     @Inject
     public LocalTransport(Settings settings, ThreadPool threadPool, Version version) {
         super(settings);
         this.threadPool = threadPool;
         this.version = version;
+
+        int workerCount = this.settings.getAsInt(TRANSPORT_LOCAL_WORKERS, EsExecutors.boundedNumberOfProcessors(settings));
+        int queueSize = this.settings.getAsInt(TRANSPORT_LOCAL_QUEUE, -1);
+        logger.debug("creating [{}] workers, queue_size [{}]", workerCount, queueSize);
+        this.workers = EsExecutors.newFixed(workerCount, queueSize, EsExecutors.daemonThreadFactory(this.settings, "local_transport"));
     }
 
     @Override
@@ -106,6 +117,13 @@ public class LocalTransport extends AbstractLifecycleComponent<Transport> implem
 
     @Override
     protected void doClose() throws ElasticsearchException {
+        workers.shutdown();
+        try {
+            workers.awaitTermination(10, TimeUnit.SECONDS);
+        } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+        }
+        workers.shutdownNow();
     }
 
     @Override
@@ -185,7 +203,7 @@ public class LocalTransport extends AbstractLifecycleComponent<Transport> implem
 
         transportServiceAdapter.sent(data.length);
 
-        threadPool.generic().execute(new Runnable() {
+        targetTransport.workers().execute(new Runnable() {
             @Override
             public void run() {
                 targetTransport.messageReceived(data, action, LocalTransport.this, version, requestId);
@@ -193,8 +211,8 @@ public class LocalTransport extends AbstractLifecycleComponent<Transport> implem
         });
     }
 
-    ThreadPool threadPool() {
-        return this.threadPool;
+    ThreadPoolExecutor workers() {
+        return this.workers;
     }
 
     protected void messageReceived(byte[] data, String action, LocalTransport sourceTransport, Version version, @Nullable final Long sendRequestId) {
diff --git a/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java b/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java
index f4d5e83053a..f316e9ba69d 100644
--- a/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java
+++ b/src/main/java/org/elasticsearch/transport/local/LocalTransportChannel.java
@@ -72,7 +72,7 @@ public class LocalTransportChannel implements TransportChannel {
         response.writeTo(stream);
         stream.close();
         final byte[] data = bStream.bytes().toBytes();
-        targetTransport.threadPool().generic().execute(new Runnable() {
+        targetTransport.workers().execute(new Runnable() {
             @Override
             public void run() {
                 targetTransport.messageReceived(data, action, sourceTransport, version, null);
@@ -98,7 +98,7 @@ public class LocalTransportChannel implements TransportChannel {
             too.close();
         }
         final byte[] data = stream.bytes().toBytes();
-        targetTransport.threadPool().generic().execute(new Runnable() {
+        targetTransport.workers().execute(new Runnable() {
             @Override
             public void run() {
                 targetTransport.messageReceived(data, action, sourceTransport, version, null);
diff --git a/src/main/java/org/elasticsearch/tribe/TribeService.java b/src/main/java/org/elasticsearch/tribe/TribeService.java
index e706e400658..a335f47b53c 100644
--- a/src/main/java/org/elasticsearch/tribe/TribeService.java
+++ b/src/main/java/org/elasticsearch/tribe/TribeService.java
@@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import org.elasticsearch.ElasticsearchException;
-import org.elasticsearch.ElasticsearchIllegalStateException;
 import org.elasticsearch.action.support.master.TransportMasterNodeReadOperationAction;
 import org.elasticsearch.cluster.*;
 import org.elasticsearch.cluster.block.ClusterBlock;
@@ -43,7 +42,7 @@ import org.elasticsearch.common.regex.Regex;
 import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
-import org.elasticsearch.discovery.Discovery;
+import org.elasticsearch.discovery.DiscoveryService;
 import org.elasticsearch.gateway.GatewayService;
 import org.elasticsearch.node.NodeBuilder;
 import org.elasticsearch.node.internal.InternalNode;
@@ -53,7 +52,6 @@ import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.CountDownLatch;
 
 /**
  * The tribe service holds a list of node clients connected to a list of tribe members, and uses their
@@ -121,7 +119,7 @@ public class TribeService extends AbstractLifecycleComponent<TribeService> {
     private final List<InternalNode> nodes = Lists.newCopyOnWriteArrayList();
 
     @Inject
-    public TribeService(Settings settings, ClusterService clusterService) {
+    public TribeService(Settings settings, ClusterService clusterService, DiscoveryService discoveryService) {
         super(settings);
         this.clusterService = clusterService;
         Map<String, Settings> nodesSettings = Maps.newHashMap(settings.getGroups("tribe", true));
@@ -143,7 +141,7 @@ public class TribeService extends AbstractLifecycleComponent<TribeService> {
         if (!nodes.isEmpty()) {
             // remove the initial election / recovery blocks since we are not going to have a
             // master elected in this single tribe  node local "cluster"
-            clusterService.removeInitialStateBlock(Discovery.NO_MASTER_BLOCK);
+            clusterService.removeInitialStateBlock(discoveryService.getNoMasterBlock());
             clusterService.removeInitialStateBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK);
             if (settings.getAsBoolean("tribe.blocks.write", false)) {
                 clusterService.addInitialStateBlock(TRIBE_WRITE_BLOCK);
@@ -222,7 +220,7 @@ public class TribeService extends AbstractLifecycleComponent<TribeService> {
         @Override
         public void clusterChanged(final ClusterChangedEvent event) {
             logger.debug("[{}] received cluster event, [{}]", tribeName, event.source());
-            clusterService.submitStateUpdateTask("cluster event from " + tribeName + ", " + event.source(), new ClusterStateUpdateTask() {
+            clusterService.submitStateUpdateTask("cluster event from " + tribeName + ", " + event.source(), new ClusterStateNonMasterUpdateTask() {
                 @Override
                 public ClusterState execute(ClusterState currentState) throws Exception {
                     ClusterState tribeState = event.state();
diff --git a/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java b/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java
index dde9eedc4e1..1d0a2038615 100644
--- a/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java
+++ b/src/test/java/org/elasticsearch/cluster/ClusterServiceTests.java
@@ -19,6 +19,7 @@
 package org.elasticsearch.cluster;
 
 import com.google.common.base.Predicate;
+import com.google.common.util.concurrent.ListenableFuture;
 import org.elasticsearch.ElasticsearchException;
 import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
 import org.elasticsearch.action.admin.cluster.tasks.PendingClusterTasksResponse;
@@ -256,6 +257,58 @@ public class ClusterServiceTests extends ElasticsearchIntegrationTest {
         assertThat(processedLatch.await(1, TimeUnit.SECONDS), equalTo(true));
     }
 
+    @Test
+    public void testMasterAwareExecution() throws Exception {
+        Settings settings = settingsBuilder()
+                .put("discovery.type", "local")
+                .build();
+
+        ListenableFuture<String> master = internalCluster().startNodeAsync(settings);
+        ListenableFuture<String> nonMaster = internalCluster().startNodeAsync(settingsBuilder().put(settings).put("node.master", false).build());
+        master.get();
+        ensureGreen(); // make sure we have a cluster
+
+        ClusterService clusterService = internalCluster().getInstance(ClusterService.class, nonMaster.get());
+
+        final boolean[] taskFailed = {false};
+        final CountDownLatch latch1 = new CountDownLatch(1);
+        clusterService.submitStateUpdateTask("test", new ClusterStateUpdateTask() {
+            @Override
+            public ClusterState execute(ClusterState currentState) throws Exception {
+                latch1.countDown();
+                return currentState;
+            }
+
+            @Override
+            public void onFailure(String source, Throwable t) {
+                taskFailed[0] = true;
+                latch1.countDown();
+            }
+        });
+
+        latch1.await();
+        assertTrue("cluster state update task was executed on a non-master", taskFailed[0]);
+
+        taskFailed[0] = true;
+        final CountDownLatch latch2 = new CountDownLatch(1);
+        clusterService.submitStateUpdateTask("test", new ClusterStateNonMasterUpdateTask() {
+            @Override
+            public ClusterState execute(ClusterState currentState) throws Exception {
+                taskFailed[0] = false;
+                latch2.countDown();
+                return currentState;
+            }
+
+            @Override
+            public void onFailure(String source, Throwable t) {
+                taskFailed[0] = true;
+                latch2.countDown();
+            }
+        });
+        latch2.await();
+        assertFalse("non-master cluster state update task was not executed", taskFailed[0]);
+    }
+
     @Test
     public void testAckedUpdateTaskNoAckExpected() throws Exception {
         Settings settings = settingsBuilder()
@@ -655,7 +708,7 @@ public class ClusterServiceTests extends ElasticsearchIntegrationTest {
         }
     }
 
-    private static class BlockingTask implements ClusterStateUpdateTask {
+    private static class BlockingTask extends ClusterStateUpdateTask {
         private final CountDownLatch latch = new CountDownLatch(1);
 
         @Override
@@ -674,7 +727,7 @@ public class ClusterServiceTests extends ElasticsearchIntegrationTest {
 
     }
 
-    private static class PrioritiezedTask implements ClusterStateUpdateTask {
+    private static class PrioritiezedTask extends ClusterStateUpdateTask {
 
         private final Priority priority;
         private final CountDownLatch latch;
diff --git a/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java b/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java
index 3fe477cc989..5e63990fe04 100644
--- a/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java
+++ b/src/test/java/org/elasticsearch/cluster/MinimumMasterNodesTests.java
@@ -25,7 +25,7 @@ import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
 import org.elasticsearch.client.Client;
 import org.elasticsearch.common.Priority;
 import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.discovery.Discovery;
+import org.elasticsearch.discovery.DiscoverySettings;
 import org.elasticsearch.discovery.zen.elect.ElectMasterService;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
@@ -60,7 +60,7 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
 
         logger.info("--> should be blocked, no master...");
         ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true));
         assertThat(state.nodes().size(), equalTo(1)); // verify that we still see the local node in the cluster state
 
         logger.info("--> start second node, cluster should be formed");
@@ -70,9 +70,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
         assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
 
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false));
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false));
 
         state = client().admin().cluster().prepareState().execute().actionGet().getState();
         assertThat(state.nodes().size(), equalTo(2));
@@ -98,11 +98,11 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
         awaitBusy(new Predicate<Object>() {
             public boolean apply(Object obj) {
                 ClusterState  state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-                return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK);
+                return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID);
             }
         });
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true));
         assertThat(state.nodes().size(), equalTo(1)); // verify that we still see the local node in the cluster state
 
         logger.info("--> starting the previous master node again...");
@@ -112,9 +112,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
         assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
 
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false));
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false));
 
         state = client().admin().cluster().prepareState().execute().actionGet().getState();
         assertThat(state.nodes().size(), equalTo(2));
@@ -135,7 +135,7 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
         assertThat(awaitBusy(new Predicate<Object>() {
             public boolean apply(Object obj) {
                 ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-                return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK);
+                return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID);
             }
         }), equalTo(true));
 
@@ -146,9 +146,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
         assertThat(clusterHealthResponse.isTimedOut(), equalTo(false));
 
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false));
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(false));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(false));
 
         state = client().admin().cluster().prepareState().execute().actionGet().getState();
         assertThat(state.nodes().size(), equalTo(2));
@@ -183,21 +183,21 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
         awaitBusy(new Predicate<Object>() {
             public boolean apply(Object obj) {
                 ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-                return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK);
+                return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID);
             }
         });
         
         awaitBusy(new Predicate<Object>() {
             public boolean apply(Object obj) {
                 ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-                return state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK);
+                return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID);
             }
         });
 
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true));
         state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-        assertThat(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK), equalTo(true));
+        assertThat(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID), equalTo(true));
 
         logger.info("--> start two more nodes");
         internalCluster().startNode(settings);
@@ -298,9 +298,9 @@ public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
                 boolean success = true;
                 for (Client client : internalCluster()) {
                     ClusterState state = client.admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-                    success &= state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK);
+                    success &= state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID);
                     if (logger.isDebugEnabled()) {
-                        logger.debug("Checking for NO_MASTER_BLOCK on client: {} NO_MASTER_BLOCK: [{}]", client, state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK));
+                        logger.debug("Checking for NO_MASTER_BLOCK on client: {} NO_MASTER_BLOCK: [{}]", client, state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID));
                     }
                 }
                 return success;
diff --git a/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java b/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java
index a9464abc7cd..95dbbf652ab 100644
--- a/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java
+++ b/src/test/java/org/elasticsearch/cluster/NoMasterNodeTests.java
@@ -19,14 +19,20 @@
 
 package org.elasticsearch.cluster;
 
+import com.google.common.base.Predicate;
 import org.elasticsearch.action.ActionRequestBuilder;
+import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
 import org.elasticsearch.action.bulk.BulkRequestBuilder;
+import org.elasticsearch.action.count.CountResponse;
+import org.elasticsearch.action.get.GetResponse;
 import org.elasticsearch.action.percolate.PercolateSourceBuilder;
+import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.cluster.block.ClusterBlockException;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.xcontent.XContentFactory;
-import org.elasticsearch.discovery.Discovery;
+import org.elasticsearch.discovery.DiscoverySettings;
 import org.elasticsearch.discovery.MasterNotDiscoveredException;
 import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.script.ScriptService;
@@ -40,7 +46,7 @@ import java.util.HashMap;
 import static org.elasticsearch.action.percolate.PercolateSourceBuilder.docBuilder;
 import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
 import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
 import static org.hamcrest.Matchers.*;
 
 /**
@@ -61,6 +67,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest {
                 .put("discovery.zen.minimum_master_nodes", 2)
                 .put("discovery.zen.ping_timeout", "200ms")
                 .put("discovery.initial_state_timeout", "500ms")
+                .put(DiscoverySettings.NO_MASTER_BLOCK, "all")
                 .build();
 
         TimeValue timeout = TimeValue.timeValueMillis(200);
@@ -75,7 +82,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest {
             @Override
             public void run() {
                 ClusterState state = client().admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-                assertTrue(state.blocks().hasGlobalBlock(Discovery.NO_MASTER_BLOCK));
+                assertTrue(state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID));
             }
         });
 
@@ -128,7 +135,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest {
                 ClusterBlockException.class, RestStatus.SERVICE_UNAVAILABLE
         );
 
-        checkWriteAction(autoCreateIndex, timeout,
+        checkWriteAction(false, timeout,
                 client().prepareUpdate("test", "type1", "1").setScript("test script", ScriptService.ScriptType.INLINE).setTimeout(timeout));
 
 
@@ -136,7 +143,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest {
                 client().prepareUpdate("no_index", "type1", "1").setScript("test script", ScriptService.ScriptType.INLINE).setTimeout(timeout));
 
 
-        checkWriteAction(autoCreateIndex, timeout,
+        checkWriteAction(false, timeout,
                 client().prepareIndex("test", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout));
 
         checkWriteAction(autoCreateIndex, timeout,
@@ -145,9 +152,7 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest {
         BulkRequestBuilder bulkRequestBuilder = client().prepareBulk();
         bulkRequestBuilder.add(client().prepareIndex("test", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject()));
         bulkRequestBuilder.add(client().prepareIndex("test", "type1", "2").setSource(XContentFactory.jsonBuilder().startObject().endObject()));
-        // today, we clear the metadata on when there is no master, so it will go through the auto create logic and
-        // add it... (if autoCreate is set to true)
-        checkBulkAction(autoCreateIndex, bulkRequestBuilder);
+        checkBulkAction(false, bulkRequestBuilder);
 
         bulkRequestBuilder = client().prepareBulk();
         bulkRequestBuilder.add(client().prepareIndex("no_index", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject()));
@@ -203,4 +208,75 @@ public class NoMasterNodeTests extends ElasticsearchIntegrationTest {
             }
         }
     }
+
+    @Test
+    public void testNoMasterActions_writeMasterBlock() throws Exception {
+        Settings settings = settingsBuilder()
+                .put("discovery.type", "zen")
+                .put("action.auto_create_index", false)
+                .put("discovery.zen.minimum_master_nodes", 2)
+                .put("discovery.zen.ping_timeout", "200ms")
+                .put("discovery.initial_state_timeout", "500ms")
+                .put(DiscoverySettings.NO_MASTER_BLOCK, "write")
+                .build();
+
+        internalCluster().startNode(settings);
+        // start a second node, create an index, and then shut it down so we have no master block
+        internalCluster().startNode(settings);
+        prepareCreate("test1").setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).get();
+        prepareCreate("test2").setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 2, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).get();
+        client().admin().cluster().prepareHealth("_all").setWaitForGreenStatus().get();
+        client().prepareIndex("test1", "type1", "1").setSource("field", "value1").get();
+        client().prepareIndex("test2", "type1", "1").setSource("field", "value1").get();
+        refresh();
+
+        ensureSearchable("test1", "test2");
+
+        ClusterStateResponse clusterState = client().admin().cluster().prepareState().get();
+        logger.info("Cluster state:\n" + clusterState.getState().prettyPrint());
+
+        internalCluster().stopRandomDataNode();
+        assertThat(awaitBusy(new Predicate<Object>() {
+            public boolean apply(Object o) {
+                ClusterState state = client().admin().cluster().prepareState().setLocal(true).get().getState();
+                return state.blocks().hasGlobalBlock(DiscoverySettings.NO_MASTER_BLOCK_ID);
+            }
+        }), equalTo(true));
+
+
+        GetResponse getResponse = client().prepareGet("test1", "type1", "1").get();
+        assertExists(getResponse);
+
+        CountResponse countResponse = client().prepareCount("test1").get();
+        assertHitCount(countResponse, 1l);
+
+        SearchResponse searchResponse = client().prepareSearch("test1").get();
+        assertHitCount(searchResponse, 1l);
+
+        countResponse = client().prepareCount("test2").get();
+        assertThat(countResponse.getTotalShards(), equalTo(2));
+        assertThat(countResponse.getSuccessfulShards(), equalTo(1));
+
+        TimeValue timeout = TimeValue.timeValueMillis(200);
+        long now = System.currentTimeMillis();
+        try {
+            client().prepareUpdate("test1", "type1", "1").setDoc("field", "value2").setTimeout(timeout).get();
+            fail("Expected ClusterBlockException");
+        } catch (ClusterBlockException e) {
+            assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50));
+            assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE));
+        }
+
+        now = System.currentTimeMillis();
+        try {
+            client().prepareIndex("test1", "type1", "1").setSource(XContentFactory.jsonBuilder().startObject().endObject()).setTimeout(timeout).get();
+            fail("Expected ClusterBlockException");
+        } catch (ClusterBlockException e) {
+            assertThat(System.currentTimeMillis() - now, greaterThan(timeout.millis() - 50));
+            assertThat(e.status(), equalTo(RestStatus.SERVICE_UNAVAILABLE));
+        }
+
+        internalCluster().startNode(settings);
+        client().admin().cluster().prepareHealth().setWaitForGreenStatus().setWaitForNodes("2").get();
+    }
 }
diff --git a/src/test/java/org/elasticsearch/discovery/ClusterDiscoveryConfiguration.java b/src/test/java/org/elasticsearch/discovery/ClusterDiscoveryConfiguration.java
new file mode 100644
index 00000000000..ded26ddc305
--- /dev/null
+++ b/src/test/java/org/elasticsearch/discovery/ClusterDiscoveryConfiguration.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.discovery;
+
+import com.carrotsearch.randomizedtesting.RandomizedTest;
+import com.google.common.primitives.Ints;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.elasticsearch.test.InternalTestCluster;
+import org.elasticsearch.test.SettingsSource;
+import org.elasticsearch.transport.local.LocalTransport;
+
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+
+public class ClusterDiscoveryConfiguration extends SettingsSource {
+
+    public static Settings DEFAULT_SETTINGS = ImmutableSettings.settingsBuilder()
+            .put("gateway.type", "local")
+            .put("discovery.type", "zen")
+            .build();
+
+    final int numOfNodes;
+
+    final Settings baseSettings;
+
+    public ClusterDiscoveryConfiguration(int numOfNodes) {
+        this(numOfNodes, ImmutableSettings.EMPTY);
+    }
+
+    public ClusterDiscoveryConfiguration(int numOfNodes, Settings extraSettings) {
+        this.numOfNodes = numOfNodes;
+        this.baseSettings = ImmutableSettings.builder().put(DEFAULT_SETTINGS).put(extraSettings).build();
+    }
+
+    @Override
+    public Settings node(int nodeOrdinal) {
+        return baseSettings;
+    }
+
+    @Override
+    public Settings transportClient() {
+        return baseSettings;
+    }
+
+    public static class UnicastZen extends ClusterDiscoveryConfiguration {
+
+        private final static AtomicInteger portRangeCounter = new AtomicInteger();
+
+        private final int[] unicastHostOrdinals;
+        private final int basePort;
+
+        public UnicastZen(int numOfNodes) {
+            this(numOfNodes, numOfNodes);
+        }
+
+        public UnicastZen(int numOfNodes, Settings extraSettings) {
+            this(numOfNodes, numOfNodes, extraSettings);
+        }
+
+        public UnicastZen(int numOfNodes, int numOfUnicastHosts) {
+            this(numOfNodes, numOfUnicastHosts, ImmutableSettings.EMPTY);
+        }
+
+        public UnicastZen(int numOfNodes, int numOfUnicastHosts, Settings extraSettings) {
+            super(numOfNodes, extraSettings);
+            if (numOfUnicastHosts == numOfNodes) {
+                unicastHostOrdinals = new int[numOfNodes];
+                for (int i = 0; i < numOfNodes; i++) {
+                    unicastHostOrdinals[i] = i;
+                }
+            } else {
+                Set<Integer> ordinals = new HashSet<>(numOfUnicastHosts);
+                while (ordinals.size() != numOfUnicastHosts) {
+                    ordinals.add(RandomizedTest.randomInt(numOfNodes - 1));
+                }
+                unicastHostOrdinals = Ints.toArray(ordinals);
+            }
+            this.basePort = calcBasePort();
+        }
+
+        public UnicastZen(int numOfNodes, int[] unicastHostOrdinals) {
+            this(numOfNodes, ImmutableSettings.EMPTY, unicastHostOrdinals);
+        }
+
+        public UnicastZen(int numOfNodes, Settings extraSettings, int[] unicastHostOrdinals) {
+            super(numOfNodes, extraSettings);
+            this.unicastHostOrdinals = unicastHostOrdinals;
+            this.basePort = calcBasePort();
+        }
+
+        private final static int calcBasePort() {
+            return 10000 +
+                    1000 * (ElasticsearchIntegrationTest.CHILD_JVM_ID % 60) + // up to 60 jvms
+                    100 * portRangeCounter.incrementAndGet(); // up to 100 nodes
+        }
+
+
+        @Override
+        public Settings node(int nodeOrdinal) {
+            ImmutableSettings.Builder builder = ImmutableSettings.builder()
+                    .put("discovery.zen.ping.multicast.enabled", false);
+
+            String[] unicastHosts = new String[unicastHostOrdinals.length];
+            String mode = baseSettings.get("node.mode", InternalTestCluster.NODE_MODE);
+            if (mode.equals("local")) {
+                builder.put(LocalTransport.TRANSPORT_LOCAL_ADDRESS, "node_" + nodeOrdinal);
+                for (int i = 0; i < unicastHosts.length; i++) {
+                    unicastHosts[i] = "node_" + unicastHostOrdinals[i];
+                }
+            } else {
+                // we need to pin the node port & host so we'd know where to point things
+                builder.put("transport.tcp.port", basePort + nodeOrdinal);
+                builder.put("transport.host", "localhost");
+                for (int i = 0; i < unicastHosts.length; i++) {
+                    unicastHosts[i] = "localhost:" + (basePort + unicastHostOrdinals[i]);
+                }
+            }
+            builder.putArray("discovery.zen.ping.unicast.hosts", unicastHosts);
+            return builder.put(super.node(nodeOrdinal)).build();
+        }
+    }
+}
diff --git a/src/test/java/org/elasticsearch/discovery/DiscoveryWithNetworkFailuresTests.java b/src/test/java/org/elasticsearch/discovery/DiscoveryWithNetworkFailuresTests.java
deleted file mode 100644
index d2987f77ad0..00000000000
--- a/src/test/java/org/elasticsearch/discovery/DiscoveryWithNetworkFailuresTests.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.discovery;
-
-import com.google.common.base.Predicate;
-import org.apache.lucene.util.LuceneTestCase;
-import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
-import org.elasticsearch.client.Client;
-import org.elasticsearch.cluster.ClusterState;
-import org.elasticsearch.cluster.node.DiscoveryNode;
-import org.elasticsearch.cluster.node.DiscoveryNodes;
-import org.elasticsearch.common.Priority;
-import org.elasticsearch.common.settings.ImmutableSettings;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.test.ElasticsearchIntegrationTest;
-import org.elasticsearch.test.transport.MockTransportService;
-import org.elasticsearch.transport.TransportModule;
-import org.elasticsearch.transport.TransportService;
-import org.junit.Test;
-
-import java.util.Arrays;
-import java.util.List;
-
-import static org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
-import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
-import static org.hamcrest.Matchers.*;
-
-/**
- */
-@ClusterScope(scope= Scope.SUITE, numDataNodes =0)
-public class DiscoveryWithNetworkFailuresTests extends ElasticsearchIntegrationTest {
-
-    @Test
-    @LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/2488")
-    public void failWithMinimumMasterNodesConfigured() throws Exception {
-        final Settings settings = ImmutableSettings.settingsBuilder()
-                .put("discovery.zen.minimum_master_nodes", 2)
-                .put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly
-                .put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName())
-                .build();
-        List<String>nodes = internalCluster().startNodesAsync(3, settings).get();
-
-        // Wait until a green status has been reaches and 3 nodes are part of the cluster
-        List<String> nodesList = Arrays.asList(nodes.toArray(new String[3]));
-        ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth()
-                .setWaitForEvents(Priority.LANGUID)
-                .setWaitForNodes("3")
-                .get();
-        assertThat(clusterHealthResponse.isTimedOut(), is(false));
-
-        // Figure out what is the elected master node
-        DiscoveryNode masterDiscoNode = null;
-        for (String node : nodesList) {
-            ClusterState state = internalCluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-            assertThat(state.nodes().size(), equalTo(3));
-            if (masterDiscoNode == null) {
-                masterDiscoNode = state.nodes().masterNode();
-            } else {
-                assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode));
-            }
-        }
-        assert masterDiscoNode != null;
-        logger.info("---> legit elected master node=" + masterDiscoNode);
-        final Client masterClient = internalCluster().masterClient();
-
-        // Everything is stable now, it is now time to simulate evil...
-
-        // Pick a node that isn't the elected master.
-        String unluckyNode = null;
-        for (String node : nodesList) {
-            if (!node.equals(masterDiscoNode.getName())) {
-                unluckyNode = node;
-            }
-        }
-        assert unluckyNode != null;
-
-        // Simulate a network issue between the unlucky node and elected master node in both directions.
-        addFailToSendNoConnectRule(masterDiscoNode.getName(), unluckyNode);
-        addFailToSendNoConnectRule(unluckyNode, masterDiscoNode.getName());
-        try {
-            // Wait until elected master has removed that the unlucky node...
-            awaitBusy(new Predicate<Object>() {
-                @Override
-                public boolean apply(Object input) {
-                    return masterClient.admin().cluster().prepareState().setLocal(true).get().getState().nodes().size() == 2;
-                }
-            });
-
-            // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
-            // continuously ping until network failures have been resolved.
-            Client isolatedNodeClient = internalCluster().client(unluckyNode);
-            ClusterState localClusterState = isolatedNodeClient.admin().cluster().prepareState().setLocal(true).get().getState();
-            DiscoveryNodes localDiscoveryNodes = localClusterState.nodes();
-            assertThat(localDiscoveryNodes.masterNode(), nullValue());
-        } finally {
-            // stop simulating network failures, from this point on the unlucky node is able to rejoin
-            // We also need to do this even if assertions fail, since otherwise the test framework can't work properly
-            clearNoConnectRule(masterDiscoNode.getName(), unluckyNode);
-            clearNoConnectRule(unluckyNode, masterDiscoNode.getName());
-        }
-
-        // Wait until the master node sees all 3 nodes again.
-        clusterHealthResponse = masterClient.admin().cluster().prepareHealth()
-                .setWaitForEvents(Priority.LANGUID)
-                .setWaitForNodes("3")
-                .get();
-        assertThat(clusterHealthResponse.isTimedOut(), is(false));
-
-        for (String node : nodesList) {
-            ClusterState state = internalCluster().client(node).admin().cluster().prepareState().setLocal(true).execute().actionGet().getState();
-            assertThat(state.nodes().size(), equalTo(3));
-            // The elected master shouldn't have changed, since the unlucky node never could have elected himself as
-            // master since m_m_n of 2 could never be satisfied.
-            assertThat(state.nodes().masterNode(), equalTo(masterDiscoNode));
-        }
-    }
-
-    private void addFailToSendNoConnectRule(String fromNode, String toNode) {
-        TransportService mockTransportService = internalCluster().getInstance(TransportService.class, fromNode);
-        ((MockTransportService) mockTransportService).addFailToSendNoConnectRule(internalCluster().getInstance(Discovery.class, toNode).localNode());
-    }
-
-    private void clearNoConnectRule(String fromNode, String toNode) {
-        TransportService mockTransportService = internalCluster().getInstance(TransportService.class, fromNode);
-        ((MockTransportService) mockTransportService).clearRule(internalCluster().getInstance(Discovery.class, toNode).localNode());
-    }
-
-}
diff --git a/src/test/java/org/elasticsearch/discovery/DiscoveryWithServiceDisruptions.java b/src/test/java/org/elasticsearch/discovery/DiscoveryWithServiceDisruptions.java
new file mode 100644
index 00000000000..82abe2eccb1
--- /dev/null
+++ b/src/test/java/org/elasticsearch/discovery/DiscoveryWithServiceDisruptions.java
@@ -0,0 +1,863 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.discovery;
+
+import com.google.common.base.Predicate;
+import org.apache.lucene.util.LuceneTestCase;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
+import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse;
+import org.elasticsearch.action.get.GetResponse;
+import org.elasticsearch.action.index.IndexResponse;
+import org.elasticsearch.client.Client;
+import org.elasticsearch.cluster.ClusterService;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.block.ClusterBlock;
+import org.elasticsearch.cluster.block.ClusterBlockLevel;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.node.DiscoveryNodes;
+import org.elasticsearch.cluster.routing.operation.hash.djb.DjbHashFunction;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.Priority;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.discovery.zen.ZenDiscovery;
+import org.elasticsearch.discovery.zen.elect.ElectMasterService;
+import org.elasticsearch.discovery.zen.fd.FaultDetection;
+import org.elasticsearch.discovery.zen.membership.MembershipAction;
+import org.elasticsearch.discovery.zen.ping.ZenPing;
+import org.elasticsearch.discovery.zen.ping.ZenPingService;
+import org.elasticsearch.discovery.zen.ping.unicast.UnicastZenPing;
+import org.elasticsearch.discovery.zen.publish.PublishClusterStateAction;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.elasticsearch.test.InternalTestCluster;
+import org.elasticsearch.test.disruption.*;
+import org.elasticsearch.test.junit.annotations.TestLogging;
+import org.elasticsearch.test.transport.MockTransportService;
+import org.elasticsearch.transport.*;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+import static org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
+import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static org.hamcrest.Matchers.*;
+
+/**
+ */
+@LuceneTestCase.Slow
+@TestLogging("discovery.zen:TRACE")
+@ClusterScope(scope = Scope.TEST, numDataNodes = 0, transportClientRatio = 0)
+public class DiscoveryWithServiceDisruptions extends ElasticsearchIntegrationTest {
+
+    private static final TimeValue DISRUPTION_HEALING_OVERHEAD = TimeValue.timeValueSeconds(40); // we use 30s as timeout in many places.
+
+    private ClusterDiscoveryConfiguration discoveryConfig;
+
+
+    @Override
+    protected Settings nodeSettings(int nodeOrdinal) {
+        return discoveryConfig.node(nodeOrdinal);
+    }
+
+    @Before
+    public void clearConfig() {
+        discoveryConfig = null;
+    }
+
+    @Override
+    protected int numberOfShards() {
+        return 3;
+    }
+
+    @Override
+    protected int numberOfReplicas() {
+        return 1;
+    }
+
+    private List<String> startCluster(int numberOfNodes) throws ExecutionException, InterruptedException {
+        return startCluster(numberOfNodes, -1);
+    }
+
+    private List<String> startCluster(int numberOfNodes, int minimumMasterNode) throws ExecutionException, InterruptedException {
+        if (randomBoolean()) {
+            return startMulticastCluster(numberOfNodes, minimumMasterNode);
+        } else {
+            return startUnicastCluster(numberOfNodes, null, minimumMasterNode);
+        }
+    }
+
+    final static Settings DEFAULT_SETTINGS = ImmutableSettings.builder()
+            .put(FaultDetection.SETTING_PING_TIMEOUT, "1s") // for hitting simulated network failures quickly
+            .put(FaultDetection.SETTING_PING_RETRIES, "1") // for hitting simulated network failures quickly
+            .put("discovery.zen.join_timeout", "10s")  // still long to induce failures but to long so test won't time out
+            .put(DiscoverySettings.PUBLISH_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly
+            .put("http.enabled", false) // just to make test quicker
+            .put("gateway.local.list_timeout", "10s") // still long to induce failures but to long so test won't time out
+            .put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName())
+            .build();
+
+    private List<String> startMulticastCluster(int numberOfNodes, int minimumMasterNode) throws ExecutionException, InterruptedException {
+        if (minimumMasterNode < 0) {
+            minimumMasterNode = numberOfNodes / 2 + 1;
+        }
+        // TODO: Rarely use default settings form some of these
+        Settings settings = ImmutableSettings.builder()
+                .put(DEFAULT_SETTINGS)
+                .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES, minimumMasterNode)
+                .build();
+
+        if (discoveryConfig == null) {
+            discoveryConfig = new ClusterDiscoveryConfiguration(numberOfNodes, settings);
+        }
+        List<String> nodes = internalCluster().startNodesAsync(numberOfNodes).get();
+        ensureStableCluster(numberOfNodes);
+
+        return nodes;
+    }
+
+    private List<String> startUnicastCluster(int numberOfNodes, @Nullable int[] unicastHostsOrdinals, int minimumMasterNode) throws ExecutionException, InterruptedException {
+        if (minimumMasterNode < 0) {
+            minimumMasterNode = numberOfNodes / 2 + 1;
+        }
+        // TODO: Rarely use default settings form some of these
+        Settings nodeSettings = ImmutableSettings.builder()
+                .put(DEFAULT_SETTINGS)
+                .put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES, minimumMasterNode)
+                .build();
+
+        if (discoveryConfig == null) {
+            if (unicastHostsOrdinals == null) {
+                discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(numberOfNodes, nodeSettings);
+            } else {
+                discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(numberOfNodes, nodeSettings, unicastHostsOrdinals);
+            }
+        }
+        List<String> nodes = internalCluster().startNodesAsync(numberOfNodes).get();
+        ensureStableCluster(numberOfNodes);
+
+        // TODO: this is a temporary solution so that nodes will not base their reaction to a partition based on previous successful results
+        for (ZenPingService pingService : internalCluster().getInstances(ZenPingService.class)) {
+            for (ZenPing zenPing : pingService.zenPings()) {
+                if (zenPing instanceof UnicastZenPing) {
+                    ((UnicastZenPing) zenPing).clearTemporalReponses();
+                }
+            }
+        }
+
+        return nodes;
+    }
+
+
+    /**
+     * Test that no split brain occurs under partial network partition. See https://github.com/elasticsearch/elasticsearch/issues/2488
+     *
+     * @throws Exception
+     */
+    @Test
+    public void failWithMinimumMasterNodesConfigured() throws Exception {
+
+        List<String> nodes = startCluster(3);
+
+        // Figure out what is the elected master node
+        final String masterNode = internalCluster().getMasterName();
+        logger.info("---> legit elected master node=" + masterNode);
+
+        // Pick a node that isn't the elected master.
+        Set<String> nonMasters = new HashSet<>(nodes);
+        nonMasters.remove(masterNode);
+        final String unluckyNode = randomFrom(nonMasters.toArray(Strings.EMPTY_ARRAY));
+
+
+        // Simulate a network issue between the unlucky node and elected master node in both directions.
+
+        NetworkDisconnectPartition networkDisconnect = new NetworkDisconnectPartition(masterNode, unluckyNode, getRandom());
+        setDisruptionScheme(networkDisconnect);
+        networkDisconnect.startDisrupting();
+
+        // Wait until elected master has removed that the unlucky node...
+        ensureStableCluster(2, masterNode);
+
+        // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
+        // continuously ping until network failures have been resolved. However
+        // It may a take a bit before the node detects it has been cut off from the elected master
+        assertNoMaster(unluckyNode);
+
+        networkDisconnect.stopDisrupting();
+
+        // Wait until the master node sees all 3 nodes again.
+        ensureStableCluster(3);
+
+        // The elected master shouldn't have changed, since the unlucky node never could have elected himself as
+        // master since m_m_n of 2 could never be satisfied.
+        assertMaster(masterNode, nodes);
+    }
+
+    /**
+     * Verify that the proper block is applied when nodes loose their master
+     */
+    @Test
+    @TestLogging(value = "cluster.service:TRACE,indices.recovery:TRACE")
+    public void testVerifyApiBlocksDuringPartition() throws Exception {
+        startCluster(3);
+
+        // Makes sure that the get request can be executed on each node locally:
+        assertAcked(prepareCreate("test").setSettings(ImmutableSettings.builder()
+                        .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
+                        .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
+        ));
+
+        // Everything is stable now, it is now time to simulate evil...
+        // but first make sure we have no initializing shards and all is green
+        // (waiting for green here, because indexing / search in a yellow index is fine as long as no other nodes go down)
+        ensureGreen("test");
+
+        NetworkPartition networkPartition = addRandomPartition();
+
+        final String isolatedNode = networkPartition.getMinoritySide().get(0);
+        final String nonIsolatedNode = networkPartition.getMajoritySide().get(0);
+
+        // Simulate a network issue between the unlucky node and the rest of the cluster.
+        networkPartition.startDisrupting();
+
+
+        // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
+        // continuously ping until network failures have been resolved. However
+        // It may a take a bit before the node detects it has been cut off from the elected master
+        logger.info("waiting for isolated node [{}] to have no master", isolatedNode);
+        assertNoMaster(isolatedNode, DiscoverySettings.NO_MASTER_BLOCK_WRITES, TimeValue.timeValueSeconds(10));
+
+
+        logger.info("wait until elected master has been removed and a new 2 node cluster was from (via [{}])", isolatedNode);
+        ensureStableCluster(2, nonIsolatedNode);
+
+        for (String node : networkPartition.getMajoritySide()) {
+            ClusterState nodeState = getNodeClusterState(node);
+            boolean success = true;
+            if (nodeState.nodes().getMasterNode() == null) {
+                success = false;
+            }
+            if (!nodeState.blocks().global().isEmpty()) {
+                success = false;
+            }
+            if (!success) {
+                fail("node [" + node + "] has no master or has blocks, despite of being on the right side of the partition. State dump:\n"
+                        + nodeState.prettyPrint());
+            }
+        }
+
+
+        networkPartition.stopDisrupting();
+
+        // Wait until the master node sees al 3 nodes again.
+        ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkPartition.expectedTimeToHeal().millis()));
+
+        logger.info("Verify no master block with {} set to {}", DiscoverySettings.NO_MASTER_BLOCK, "all");
+        client().admin().cluster().prepareUpdateSettings()
+                .setTransientSettings(ImmutableSettings.builder().put(DiscoverySettings.NO_MASTER_BLOCK, "all"))
+                .get();
+
+        networkPartition.startDisrupting();
+
+
+        // The unlucky node must report *no* master node, since it can't connect to master and in fact it should
+        // continuously ping until network failures have been resolved. However
+        // It may a take a bit before the node detects it has been cut off from the elected master
+        logger.info("waiting for isolated node [{}] to have no master", isolatedNode);
+        assertNoMaster(isolatedNode, DiscoverySettings.NO_MASTER_BLOCK_ALL, TimeValue.timeValueSeconds(10));
+
+        // make sure we have stable cluster & cross partition recoveries are canceled by the removal of the missing node
+        // the unresponsive partition causes recoveries to only time out after 15m (default) and these will cause
+        // the test to fail due to unfreed resources
+        ensureStableCluster(2, nonIsolatedNode);
+
+    }
+
+    /**
+     * This test isolates the master from rest of the cluster, waits for a new master to be elected, restores the partition
+     * and verifies that all node agree on the new cluster state
+     */
+    @Test
+    @TestLogging("discovery.zen:TRACE,action:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE")
+    public void testIsolateMasterAndVerifyClusterStateConsensus() throws Exception {
+        final List<String> nodes = startCluster(3);
+
+        assertAcked(prepareCreate("test")
+                .setSettings(ImmutableSettings.builder()
+                                .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2))
+                                .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))
+                ));
+
+        ensureGreen();
+        String isolatedNode = internalCluster().getMasterName();
+        NetworkPartition networkPartition = addRandomIsolation(isolatedNode);
+        networkPartition.startDisrupting();
+
+        String nonIsolatedNode = networkPartition.getMajoritySide().get(0);
+
+        // make sure cluster reforms
+        ensureStableCluster(2, nonIsolatedNode);
+
+        // make sure isolated need picks up on things.
+        assertNoMaster(isolatedNode, TimeValue.timeValueSeconds(40));
+
+        // restore isolation
+        networkPartition.stopDisrupting();
+
+        ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkPartition.expectedTimeToHeal().millis()));
+
+        logger.info("issue a reroute");
+        // trigger a reroute now, instead of waiting for the background reroute of RerouteService
+        assertAcked(client().admin().cluster().prepareReroute());
+        // and wait for it to finish and for the cluster to stabilize
+        ensureGreen("test");
+
+        // verify all cluster states are the same
+        ClusterState state = null;
+        for (String node : nodes) {
+            ClusterState nodeState = getNodeClusterState(node);
+            if (state == null) {
+                state = nodeState;
+                continue;
+            }
+            // assert nodes are identical
+            try {
+                assertEquals("unequal versions", state.version(), nodeState.version());
+                assertEquals("unequal node count", state.nodes().size(), nodeState.nodes().size());
+                assertEquals("different masters ", state.nodes().masterNodeId(), nodeState.nodes().masterNodeId());
+                assertEquals("different meta data version", state.metaData().version(), nodeState.metaData().version());
+                if (!state.routingTable().prettyPrint().equals(nodeState.routingTable().prettyPrint())) {
+                    fail("different routing");
+                }
+            } catch (AssertionError t) {
+                fail("failed comparing cluster state: " + t.getMessage() + "\n" +
+                        "--- cluster state of node [" + nodes.get(0) + "]: ---\n" + state.prettyPrint() +
+                        "\n--- cluster state [" + node + "]: ---\n" + nodeState.prettyPrint());
+            }
+
+        }
+    }
+
+    /**
+     * Test the we do not loose document whose indexing request was successful, under a randomly selected disruption scheme
+     * We also collect & report the type of indexing failures that occur.
+     */
+    @Test
+    @LuceneTestCase.AwaitsFix(bugUrl = "needs some more work to stabilize")
+    @TestLogging("action.index:TRACE,action.get:TRACE,discovery:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE")
+    public void testAckedIndexing() throws Exception {
+        final List<String> nodes = startCluster(3);
+
+        assertAcked(prepareCreate("test")
+                .setSettings(ImmutableSettings.builder()
+                                .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2))
+                                .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))
+                ));
+        ensureGreen();
+
+        ServiceDisruptionScheme disruptionScheme = addRandomDisruptionScheme();
+        logger.info("disruption scheme [{}] added", disruptionScheme);
+
+        final ConcurrentHashMap<String, String> ackedDocs = new ConcurrentHashMap<>(); // id -> node sent.
+
+        final AtomicBoolean stop = new AtomicBoolean(false);
+        List<Thread> indexers = new ArrayList<>(nodes.size());
+        List<Semaphore> semaphores = new ArrayList<>(nodes.size());
+        final AtomicInteger idGenerator = new AtomicInteger(0);
+        final AtomicReference<CountDownLatch> countDownLatchRef = new AtomicReference<>();
+        final List<Exception> exceptedExceptions = Collections.synchronizedList(new ArrayList<Exception>());
+
+        logger.info("starting indexers");
+        try {
+            for (final String node : nodes) {
+                final Semaphore semaphore = new Semaphore(0);
+                semaphores.add(semaphore);
+                final Client client = client(node);
+                final String name = "indexer_" + indexers.size();
+                final int numPrimaries = getNumShards("test").numPrimaries;
+                Thread thread = new Thread(new Runnable() {
+                    @Override
+                    public void run() {
+                        while (!stop.get()) {
+                            String id = null;
+                            try {
+                                if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
+                                    continue;
+                                }
+                                logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
+                                try {
+                                    id = Integer.toString(idGenerator.incrementAndGet());
+                                    int shard = ((InternalTestCluster) cluster()).getInstance(DjbHashFunction.class).hash(id) % numPrimaries;
+                                    logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
+                                    IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}").setTimeout("1s").get();
+                                    assertThat(response.getVersion(), equalTo(1l));
+                                    ackedDocs.put(id, node);
+                                    logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node);
+                                } catch (ElasticsearchException e) {
+                                    exceptedExceptions.add(e);
+                                    logger.trace("[{}] failed id [{}] through node [{}]", e, name, id, node);
+                                } finally {
+                                    countDownLatchRef.get().countDown();
+                                    logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
+                                }
+                            } catch (InterruptedException e) {
+                                // fine - semaphore interrupt
+                            } catch (Throwable t) {
+                                logger.info("unexpected exception in background thread of [{}]", t, node);
+                            }
+                        }
+                    }
+                });
+
+                thread.setName(name);
+                thread.setDaemon(true);
+                thread.start();
+                indexers.add(thread);
+            }
+
+            int docsPerIndexer = randomInt(3);
+            logger.info("indexing " + docsPerIndexer + " docs per indexer before partition");
+            countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
+            for (Semaphore semaphore : semaphores) {
+                semaphore.release(docsPerIndexer);
+            }
+            assertTrue(countDownLatchRef.get().await(1, TimeUnit.MINUTES));
+
+            for (int iter = 1 + randomInt(2); iter > 0; iter--) {
+                logger.info("starting disruptions & indexing (iteration [{}])", iter);
+                disruptionScheme.startDisrupting();
+
+                docsPerIndexer = 1 + randomInt(5);
+                logger.info("indexing " + docsPerIndexer + " docs per indexer during partition");
+                countDownLatchRef.set(new CountDownLatch(docsPerIndexer * indexers.size()));
+                Collections.shuffle(semaphores);
+                for (Semaphore semaphore : semaphores) {
+                    assertThat(semaphore.availablePermits(), equalTo(0));
+                    semaphore.release(docsPerIndexer);
+                }
+                assertTrue(countDownLatchRef.get().await(60000 + disruptionScheme.expectedTimeToHeal().millis() * (docsPerIndexer * indexers.size()), TimeUnit.MILLISECONDS));
+
+                logger.info("stopping disruption");
+                disruptionScheme.stopDisrupting();
+                ensureStableCluster(3, TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis()));
+                ensureGreen("test");
+
+                logger.info("validating successful docs");
+                for (String node : nodes) {
+                    try {
+                        logger.debug("validating through node [{}]", node);
+                        for (String id : ackedDocs.keySet()) {
+                            assertTrue("doc [" + id + "] indexed via node [" + ackedDocs.get(id) + "] not found",
+                                    client(node).prepareGet("test", "type", id).setPreference("_local").get().isExists());
+                        }
+                    } catch (AssertionError e) {
+                        throw new AssertionError(e.getMessage() + " (checked via node [" + node + "]", e);
+                    }
+                }
+
+                logger.info("done validating (iteration [{}])", iter);
+            }
+        } finally {
+            if (exceptedExceptions.size() > 0) {
+                StringBuilder sb = new StringBuilder("Indexing exceptions during disruption:");
+                for (Exception e : exceptedExceptions) {
+                    sb.append("\n").append(e.getMessage());
+                }
+                logger.debug(sb.toString());
+            }
+            logger.info("shutting down indexers");
+            stop.set(true);
+            for (Thread indexer : indexers) {
+                indexer.interrupt();
+                indexer.join(60000);
+            }
+        }
+    }
+
+    /**
+     * Test that cluster recovers from a long GC on master that causes other nodes to elect a new one
+     */
+    @Test
+    @TestLogging("discovery.zen:TRACE,action:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE")
+    public void testMasterNodeGCs() throws Exception {
+        // TODO: on mac OS multicast threads are shared between nodes and we therefore we can't simulate GC and stop pinging for just one node
+        // find a way to block thread creation in the generic thread pool to avoid this.
+        List<String> nodes = startUnicastCluster(3, null, -1);
+
+        String oldMasterNode = internalCluster().getMasterName();
+        // a very long GC, but it's OK as we remove the disruption when it has had an effect
+        SingleNodeDisruption masterNodeDisruption = new LongGCDisruption(oldMasterNode, getRandom(), 100, 200, 30000, 60000);
+        internalCluster().setDisruptionScheme(masterNodeDisruption);
+        masterNodeDisruption.startDisrupting();
+
+        Set<String> oldNonMasterNodesSet = new HashSet<>(nodes);
+        oldNonMasterNodesSet.remove(oldMasterNode);
+
+        List<String> oldNonMasterNodes = new ArrayList<>(oldNonMasterNodesSet);
+
+        logger.info("waiting for nodes to de-elect master [{}]", oldMasterNode);
+        for (String node : oldNonMasterNodesSet) {
+            assertDifferentMaster(node, oldMasterNode);
+        }
+
+        logger.info("waiting for nodes to elect a new master");
+        ensureStableCluster(2, oldNonMasterNodes.get(0));
+
+        logger.info("waiting for any pinging to stop");
+        for (final String node : oldNonMasterNodes) {
+            assertTrue("node [" + node + "] is still joining master", awaitBusy(new Predicate<Object>() {
+                @Override
+                public boolean apply(Object input) {
+                    return !((ZenDiscovery) internalCluster().getInstance(Discovery.class, node)).joiningCluster();
+                }
+            }, 30, TimeUnit.SECONDS));
+        }
+
+        // restore GC
+        masterNodeDisruption.stopDisrupting();
+        ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + masterNodeDisruption.expectedTimeToHeal().millis()),
+                oldNonMasterNodes.get(0));
+
+        // make sure all nodes agree on master
+        String newMaster = internalCluster().getMasterName();
+        assertThat(newMaster, not(equalTo(oldMasterNode)));
+        assertMaster(newMaster, nodes);
+    }
+
+    /**
+     * Test that a document which is indexed on the majority side of a partition, is available from the minory side,
+     * once the partition is healed
+     *
+     * @throws Exception
+     */
+    @Test
+    @TestLogging("discovery.zen:TRACE,action:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE")
+    public void testRejoinDocumentExistsInAllShardCopies() throws Exception {
+        List<String> nodes = startCluster(3);
+
+        assertAcked(prepareCreate("test")
+                .setSettings(ImmutableSettings.builder()
+                                .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
+                                .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
+                )
+                .get());
+        ensureGreen("test");
+
+        nodes = new ArrayList<>(nodes);
+        Collections.shuffle(nodes, getRandom());
+        String isolatedNode = nodes.get(0);
+        String notIsolatedNode = nodes.get(1);
+
+        ServiceDisruptionScheme scheme = addRandomIsolation(isolatedNode);
+        scheme.startDisrupting();
+        ensureStableCluster(2, notIsolatedNode);
+        assertFalse(client(notIsolatedNode).admin().cluster().prepareHealth("test").setWaitForYellowStatus().get().isTimedOut());
+
+
+        IndexResponse indexResponse = internalCluster().client(notIsolatedNode).prepareIndex("test", "type").setSource("field", "value").get();
+        assertThat(indexResponse.getVersion(), equalTo(1l));
+
+        logger.info("Verifying if document exists via node[" + notIsolatedNode + "]");
+        GetResponse getResponse = internalCluster().client(notIsolatedNode).prepareGet("test", "type", indexResponse.getId())
+                .setPreference("_local")
+                .get();
+        assertThat(getResponse.isExists(), is(true));
+        assertThat(getResponse.getVersion(), equalTo(1l));
+        assertThat(getResponse.getId(), equalTo(indexResponse.getId()));
+
+        scheme.stopDisrupting();
+
+        ensureStableCluster(3);
+        ensureGreen("test");
+
+        for (String node : nodes) {
+            logger.info("Verifying if document exists after isolating node[" + isolatedNode + "] via node[" + node + "]");
+            getResponse = internalCluster().client(node).prepareGet("test", "type", indexResponse.getId())
+                    .setPreference("_local")
+                    .get();
+            assertThat(getResponse.isExists(), is(true));
+            assertThat(getResponse.getVersion(), equalTo(1l));
+            assertThat(getResponse.getId(), equalTo(indexResponse.getId()));
+        }
+    }
+
+    /**
+     * A 4 node cluster with m_m_n set to 3 and each node has one unicast enpoint. One node partitions from the master node.
+     * The temporal unicast responses is empty. When partition is solved the one ping response contains a master node.
+     * The rejoining node should take this master node and connect.
+     */
+    @Test
+    @TestLogging("discovery.zen:TRACE,action:TRACE")
+    public void unicastSinglePingResponseContainsMaster() throws Exception {
+        List<String> nodes = startUnicastCluster(4, new int[]{0}, -1);
+        // Figure out what is the elected master node
+        final String masterNode = internalCluster().getMasterName();
+        logger.info("---> legit elected master node=" + masterNode);
+        List<String> otherNodes = new ArrayList<>(nodes);
+        otherNodes.remove(masterNode);
+        otherNodes.remove(nodes.get(0)); // <-- Don't isolate the node that is in the unicast endpoint for all the other nodes.
+        final String isolatedNode = otherNodes.get(0);
+
+        // Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list
+        // includes all the other nodes that have pinged it and the issue doesn't manifest
+        for (ZenPingService pingService : internalCluster().getInstances(ZenPingService.class)) {
+            for (ZenPing zenPing : pingService.zenPings()) {
+                ((UnicastZenPing) zenPing).clearTemporalReponses();
+            }
+        }
+
+        // Simulate a network issue between the unlucky node and elected master node in both directions.
+        NetworkDisconnectPartition networkDisconnect = new NetworkDisconnectPartition(masterNode, isolatedNode, getRandom());
+        setDisruptionScheme(networkDisconnect);
+        networkDisconnect.startDisrupting();
+        // Wait until elected master has removed that the unlucky node...
+        ensureStableCluster(3, masterNode);
+
+        // The isolate master node must report no master, so it starts with pinging
+        assertNoMaster(isolatedNode);
+        networkDisconnect.stopDisrupting();
+        // Wait until the master node sees all 4 nodes again.
+        ensureStableCluster(4);
+        // The elected master shouldn't have changed, since the isolated node never could have elected himself as
+        // master since m_m_n of 3 could never be satisfied.
+        assertMaster(masterNode, nodes);
+    }
+
+    @Test
+    @TestLogging("discovery.zen:TRACE,action:TRACE")
+    public void isolatedUnicastNodes() throws Exception {
+        List<String> nodes = startUnicastCluster(3, new int[]{0}, -1);
+        // Figure out what is the elected master node
+        final String unicastTarget = nodes.get(0);
+
+        Set<String> unicastTargetSide = new HashSet<>();
+        unicastTargetSide.add(unicastTarget);
+
+        Set<String> restOfClusterSide = new HashSet<>();
+        restOfClusterSide.addAll(nodes);
+        restOfClusterSide.remove(unicastTarget);
+
+        // Forcefully clean temporal response lists on all nodes. Otherwise the node in the unicast host list
+        // includes all the other nodes that have pinged it and the issue doesn't manifest
+        for (ZenPingService pingService : internalCluster().getInstances(ZenPingService.class)) {
+            for (ZenPing zenPing : pingService.zenPings()) {
+                ((UnicastZenPing) zenPing).clearTemporalReponses();
+            }
+        }
+
+        // Simulate a network issue between the unicast target node and the rest of the cluster
+        NetworkDisconnectPartition networkDisconnect = new NetworkDisconnectPartition(unicastTargetSide, restOfClusterSide, getRandom());
+        setDisruptionScheme(networkDisconnect);
+        networkDisconnect.startDisrupting();
+        // Wait until elected master has removed that the unlucky node...
+        ensureStableCluster(2, nodes.get(1));
+
+        // The isolate master node must report no master, so it starts with pinging
+        assertNoMaster(unicastTarget);
+        networkDisconnect.stopDisrupting();
+        // Wait until the master node sees all 3 nodes again.
+        ensureStableCluster(3);
+    }
+
+
+    /** Test cluster join with issues in cluster state publishing * */
+    @Test
+    @TestLogging("discovery.zen:TRACE,action:TRACE")
+    public void testClusterJoinDespiteOfPublishingIssues() throws Exception {
+        List<String> nodes = startCluster(2, 1);
+
+        String masterNode = internalCluster().getMasterName();
+        String nonMasterNode;
+        if (masterNode.equals(nodes.get(0))) {
+            nonMasterNode = nodes.get(1);
+        } else {
+            nonMasterNode = nodes.get(0);
+        }
+
+        DiscoveryNodes discoveryNodes = internalCluster().getInstance(ClusterService.class, nonMasterNode).state().nodes();
+
+        logger.info("blocking requests from non master [{}] to master [{}]", nonMasterNode, masterNode);
+        MockTransportService nonMasterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nonMasterNode);
+        nonMasterTransportService.addFailToSendNoConnectRule(discoveryNodes.masterNode());
+
+        assertNoMaster(nonMasterNode);
+
+        logger.info("blocking cluster state publishing from master [{}] to non master [{}]", masterNode, nonMasterNode);
+        MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNode);
+        masterTransportService.addFailToSendNoConnectRule(discoveryNodes.localNode(), PublishClusterStateAction.ACTION_NAME);
+
+        logger.info("allowing requests from non master [{}] to master [{}], waiting for two join request", nonMasterNode, masterNode);
+        final CountDownLatch countDownLatch = new CountDownLatch(2);
+        nonMasterTransportService.addDelegate(discoveryNodes.masterNode(), new MockTransportService.DelegateTransport(nonMasterTransportService.original()) {
+            @Override
+            public void sendRequest(DiscoveryNode node, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException, TransportException {
+                if (action.equals(MembershipAction.DISCOVERY_JOIN_ACTION_NAME)) {
+                    countDownLatch.countDown();
+                }
+                super.sendRequest(node, requestId, action, request, options);
+            }
+        });
+
+        countDownLatch.await();
+
+        logger.info("waiting for cluster to reform");
+        masterTransportService.clearRule(discoveryNodes.localNode());
+        nonMasterTransportService.clearRule(discoveryNodes.masterNode());
+
+        ensureStableCluster(2);
+    }
+
+
+    protected NetworkPartition addRandomPartition() {
+        NetworkPartition partition;
+        if (randomBoolean()) {
+            partition = new NetworkUnresponsivePartition(getRandom());
+        } else {
+            partition = new NetworkDisconnectPartition(getRandom());
+        }
+
+        setDisruptionScheme(partition);
+
+        return partition;
+    }
+
+    protected NetworkPartition addRandomIsolation(String isolatedNode) {
+        Set<String> side1 = new HashSet<>();
+        Set<String> side2 = new HashSet<>(Arrays.asList(internalCluster().getNodeNames()));
+        side1.add(isolatedNode);
+        side2.remove(isolatedNode);
+
+        NetworkPartition partition;
+        if (randomBoolean()) {
+            partition = new NetworkUnresponsivePartition(side1, side2, getRandom());
+        } else {
+            partition = new NetworkDisconnectPartition(side1, side2, getRandom());
+        }
+
+        internalCluster().setDisruptionScheme(partition);
+
+        return partition;
+    }
+
+    private ServiceDisruptionScheme addRandomDisruptionScheme() {
+        List<ServiceDisruptionScheme> list = Arrays.asList(
+                new NetworkUnresponsivePartition(getRandom()),
+                new NetworkDelaysPartition(getRandom()),
+                new NetworkDisconnectPartition(getRandom()),
+                new SlowClusterStateProcessing(getRandom())
+        );
+        Collections.shuffle(list);
+        setDisruptionScheme(list.get(0));
+        return list.get(0);
+    }
+
+    private void ensureStableCluster(int nodeCount) {
+        ensureStableCluster(nodeCount, TimeValue.timeValueSeconds(30), null);
+    }
+
+    private void ensureStableCluster(int nodeCount, TimeValue timeValue) {
+        ensureStableCluster(nodeCount, timeValue, null);
+    }
+
+    private void ensureStableCluster(int nodeCount, @Nullable String viaNode) {
+        ensureStableCluster(nodeCount, TimeValue.timeValueSeconds(30), viaNode);
+    }
+
+    private void ensureStableCluster(int nodeCount, TimeValue timeValue, @Nullable String viaNode) {
+        if (viaNode == null) {
+            viaNode = randomFrom(internalCluster().getNodeNames());
+        }
+        logger.debug("ensuring cluster is stable with [{}] nodes. access node: [{}]. timeout: [{}]", nodeCount, viaNode, timeValue);
+        ClusterHealthResponse clusterHealthResponse = client(viaNode).admin().cluster().prepareHealth()
+                .setWaitForEvents(Priority.LANGUID)
+                .setWaitForNodes(Integer.toString(nodeCount))
+                .setTimeout(timeValue)
+                .setWaitForRelocatingShards(0)
+                .get();
+        if (clusterHealthResponse.isTimedOut()) {
+            ClusterStateResponse stateResponse = client(viaNode).admin().cluster().prepareState().get();
+            fail("failed to reach a stable cluster of [" + nodeCount + "] nodes. Tried via [" + viaNode + "]. last cluster state:\n"
+                    + stateResponse.getState().prettyPrint());
+        }
+        assertThat(clusterHealthResponse.isTimedOut(), is(false));
+    }
+
+    private ClusterState getNodeClusterState(String node) {
+        return client(node).admin().cluster().prepareState().setLocal(true).get().getState();
+    }
+
+    private void assertNoMaster(final String node) throws Exception {
+        assertNoMaster(node, null, TimeValue.timeValueSeconds(10));
+    }
+
+    private void assertNoMaster(final String node, TimeValue maxWaitTime) throws Exception {
+        assertNoMaster(node, null, maxWaitTime);
+    }
+
+    private void assertNoMaster(final String node, @Nullable final ClusterBlock expectedBlocks, TimeValue maxWaitTime) throws Exception {
+        assertBusy(new Runnable() {
+            @Override
+            public void run() {
+                ClusterState state = getNodeClusterState(node);
+                assertNull("node [" + node + "] still has [" + state.nodes().masterNode() + "] as master", state.nodes().masterNode());
+                if (expectedBlocks != null) {
+                    for (ClusterBlockLevel level : expectedBlocks.levels()) {
+                        assertTrue("node [" + node + "] does have level [" + level + "] in it's blocks", state.getBlocks().hasGlobalBlock(level));
+                    }
+                }
+            }
+        }, maxWaitTime.getMillis(), TimeUnit.MILLISECONDS);
+    }
+
+    private void assertDifferentMaster(final String node, final String oldMasterNode) throws Exception {
+        assertBusy(new Runnable() {
+            @Override
+            public void run() {
+                ClusterState state = getNodeClusterState(node);
+                String masterNode = null;
+                if (state.nodes().masterNode() != null) {
+                    masterNode = state.nodes().masterNode().name();
+                }
+                logger.trace("[{}] master is [{}]", node, state.nodes().masterNode());
+                assertThat("node [" + node + "] still has [" + masterNode + "] as master",
+                        oldMasterNode, not(equalTo(masterNode)));
+            }
+        }, 10, TimeUnit.SECONDS);
+    }
+
+    private void assertMaster(String masterNode, List<String> nodes) {
+        for (String node : nodes) {
+            ClusterState state = getNodeClusterState(node);
+            String failMsgSuffix = "cluster_state:\n" + state.prettyPrint();
+            assertThat("wrong node count on [" + node + "]. " + failMsgSuffix, state.nodes().size(), equalTo(nodes.size()));
+            assertThat("wrong master on node [" + node + "]. " + failMsgSuffix, state.nodes().masterNode().name(), equalTo(masterNode));
+        }
+    }
+}
diff --git a/src/test/java/org/elasticsearch/discovery/ZenFaultDetectionTests.java b/src/test/java/org/elasticsearch/discovery/ZenFaultDetectionTests.java
new file mode 100644
index 00000000000..082148921e6
--- /dev/null
+++ b/src/test/java/org/elasticsearch/discovery/ZenFaultDetectionTests.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.discovery;
+
+import com.google.common.collect.ImmutableMap;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.ClusterName;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.cluster.node.DiscoveryNodes;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.discovery.zen.DiscoveryNodesProvider;
+import org.elasticsearch.discovery.zen.fd.FaultDetection;
+import org.elasticsearch.discovery.zen.fd.MasterFaultDetection;
+import org.elasticsearch.discovery.zen.fd.NodesFaultDetection;
+import org.elasticsearch.node.service.NodeService;
+import org.elasticsearch.test.ElasticsearchTestCase;
+import org.elasticsearch.test.transport.MockTransportService;
+import org.elasticsearch.threadpool.ThreadPool;
+import org.elasticsearch.transport.TransportConnectionListener;
+import org.elasticsearch.transport.local.LocalTransport;
+import org.hamcrest.Matcher;
+import org.hamcrest.Matchers;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import static org.hamcrest.Matchers.equalTo;
+
+public class ZenFaultDetectionTests extends ElasticsearchTestCase {
+
+    protected ThreadPool threadPool;
+
+    protected static final Version version0 = Version.fromId(/*0*/99);
+    protected DiscoveryNode nodeA;
+    protected MockTransportService serviceA;
+
+    protected static final Version version1 = Version.fromId(199);
+    protected DiscoveryNode nodeB;
+    protected MockTransportService serviceB;
+
+    @Before
+    public void setUp() throws Exception {
+        super.setUp();
+        threadPool = new ThreadPool(getClass().getName());
+        serviceA = build(ImmutableSettings.builder().put("name", "TS_A").build(), version0);
+        nodeA = new DiscoveryNode("TS_A", "TS_A", serviceA.boundAddress().publishAddress(), ImmutableMap.<String, String>of(), version0);
+        serviceB = build(ImmutableSettings.builder().put("name", "TS_B").build(), version1);
+        nodeB = new DiscoveryNode("TS_B", "TS_B", serviceB.boundAddress().publishAddress(), ImmutableMap.<String, String>of(), version1);
+
+        // wait till all nodes are properly connected and the event has been sent, so tests in this class
+        // will not get this callback called on the connections done in this setup
+        final CountDownLatch latch = new CountDownLatch(4);
+        TransportConnectionListener waitForConnection = new TransportConnectionListener() {
+            @Override
+            public void onNodeConnected(DiscoveryNode node) {
+                latch.countDown();
+            }
+
+            @Override
+            public void onNodeDisconnected(DiscoveryNode node) {
+                fail("disconnect should not be called " + node);
+            }
+        };
+        serviceA.addConnectionListener(waitForConnection);
+        serviceB.addConnectionListener(waitForConnection);
+
+        serviceA.connectToNode(nodeB);
+        serviceA.connectToNode(nodeA);
+        serviceB.connectToNode(nodeA);
+        serviceB.connectToNode(nodeB);
+
+        assertThat("failed to wait for all nodes to connect", latch.await(5, TimeUnit.SECONDS), equalTo(true));
+        serviceA.removeConnectionListener(waitForConnection);
+        serviceB.removeConnectionListener(waitForConnection);
+    }
+
+    @After
+    public void tearDown() throws Exception {
+        super.tearDown();
+        serviceA.close();
+        serviceB.close();
+        threadPool.shutdown();
+    }
+
+    protected MockTransportService build(Settings settings, Version version) {
+        MockTransportService transportService = new MockTransportService(ImmutableSettings.EMPTY, new LocalTransport(settings, threadPool, version), threadPool);
+        transportService.start();
+        return transportService;
+    }
+
+    private DiscoveryNodes buildNodesForA(boolean master) {
+        DiscoveryNodes.Builder builder = DiscoveryNodes.builder();
+        builder.put(nodeA);
+        builder.put(nodeB);
+        builder.localNodeId(nodeA.id());
+        builder.masterNodeId(master ? nodeA.id() : nodeB.id());
+        return builder.build();
+    }
+
+    private DiscoveryNodes buildNodesForB(boolean master) {
+        DiscoveryNodes.Builder builder = DiscoveryNodes.builder();
+        builder.put(nodeA);
+        builder.put(nodeB);
+        builder.localNodeId(nodeB.id());
+        builder.masterNodeId(master ? nodeB.id() : nodeA.id());
+        return builder.build();
+    }
+
+    @Test
+    public void testNodesFaultDetectionConnectOnDisconnect() throws InterruptedException {
+        ImmutableSettings.Builder settings = ImmutableSettings.builder();
+        boolean shouldRetry = randomBoolean();
+        // make sure we don't ping
+        settings.put(FaultDetection.SETTING_CONNECT_ON_NETWORK_DISCONNECT, shouldRetry)
+                .put(FaultDetection.SETTING_PING_INTERVAL, "5m");
+        NodesFaultDetection nodesFD = new NodesFaultDetection(settings.build(), threadPool, serviceA, new ClusterName("test"));
+        nodesFD.start();
+        nodesFD.updateNodes(buildNodesForA(true), -1);
+        final String[] failureReason = new String[1];
+        final DiscoveryNode[] failureNode = new DiscoveryNode[1];
+        final CountDownLatch notified = new CountDownLatch(1);
+        nodesFD.addListener(new NodesFaultDetection.Listener() {
+            @Override
+            public void onNodeFailure(DiscoveryNode node, String reason) {
+                failureNode[0] = node;
+                failureReason[0] = reason;
+                notified.countDown();
+            }
+        });
+        // will raise a disconnect on A
+        serviceB.stop();
+        notified.await(30, TimeUnit.SECONDS);
+
+        assertEquals(nodeB, failureNode[0]);
+        Matcher<String> matcher = Matchers.containsString("verified");
+        if (!shouldRetry) {
+            matcher = Matchers.not(matcher);
+        }
+
+        assertThat(failureReason[0], matcher);
+    }
+
+    @Test
+    public void testMasterFaultDetectionConnectOnDisconnect() throws InterruptedException {
+
+        ImmutableSettings.Builder settings = ImmutableSettings.builder();
+        boolean shouldRetry = randomBoolean();
+        // make sure we don't ping
+        settings.put(FaultDetection.SETTING_CONNECT_ON_NETWORK_DISCONNECT, shouldRetry)
+                .put(FaultDetection.SETTING_PING_INTERVAL, "5m");
+        ClusterName clusterName = new ClusterName(randomAsciiOfLengthBetween(3, 20));
+        final DiscoveryNodes nodes = buildNodesForA(false);
+        MasterFaultDetection masterFD = new MasterFaultDetection(settings.build(), threadPool, serviceA,
+                new DiscoveryNodesProvider() {
+                    @Override
+                    public DiscoveryNodes nodes() {
+                        return nodes;
+                    }
+
+                    @Override
+                    public NodeService nodeService() {
+                        return null;
+                    }
+                },
+                clusterName
+        );
+        masterFD.start(nodeB, "test");
+
+        final String[] failureReason = new String[1];
+        final DiscoveryNode[] failureNode = new DiscoveryNode[1];
+        final CountDownLatch notified = new CountDownLatch(1);
+        masterFD.addListener(new MasterFaultDetection.Listener() {
+
+            @Override
+            public void onMasterFailure(DiscoveryNode masterNode, String reason) {
+                failureNode[0] = masterNode;
+                failureReason[0] = reason;
+                notified.countDown();
+            }
+
+            @Override
+            public void onDisconnectedFromMaster() {
+
+            }
+        });
+        // will raise a disconnect on A
+        serviceB.stop();
+        notified.await(30, TimeUnit.SECONDS);
+
+        assertEquals(nodeB, failureNode[0]);
+        Matcher<String> matcher = Matchers.containsString("verified");
+        if (!shouldRetry) {
+            matcher = Matchers.not(matcher);
+        }
+
+        assertThat(failureReason[0], matcher);
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java b/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java
index 984b24b3782..c36834d7cf9 100644
--- a/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java
+++ b/src/test/java/org/elasticsearch/discovery/ZenUnicastDiscoveryTests.java
@@ -26,7 +26,6 @@ import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
 import org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
-import org.elasticsearch.transport.local.LocalTransport;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -38,47 +37,24 @@ import static org.hamcrest.Matchers.equalTo;
 @ClusterScope(scope = Scope.TEST, numDataNodes = 0)
 public class ZenUnicastDiscoveryTests extends ElasticsearchIntegrationTest {
 
-    private static int currentNumNodes = -1;
-
-    static int currentBaseHttpPort = -1;
-    static int currentNumOfUnicastHosts = -1;
-
-    @Before
-    public void setUP() throws Exception {
-        ElasticsearchIntegrationTest.beforeClass();
-        currentNumNodes = randomIntBetween(3, 5);
-        currentNumOfUnicastHosts = randomIntBetween(1, currentNumNodes);
-        currentBaseHttpPort = 25000 + randomInt(100);
-    }
+    private ClusterDiscoveryConfiguration discoveryConfig;
 
     @Override
     protected Settings nodeSettings(int nodeOrdinal) {
-        ImmutableSettings.Builder builder = ImmutableSettings.settingsBuilder()
-                .put(super.nodeSettings(nodeOrdinal))
-                .put("discovery.type", "zen")
-                .put("discovery.zen.ping.multicast.enabled", false)
-                .put("http.enabled", false); // just to make test quicker
+        return discoveryConfig.node(nodeOrdinal);
+    }
 
-
-        String[] unicastHosts = new String[currentNumOfUnicastHosts];
-        if (internalCluster().getDefaultSettings().get("node.mode").equals("local")) {
-            builder.put(LocalTransport.TRANSPORT_LOCAL_ADDRESS, "unicast_test_" + nodeOrdinal);
-            for (int i = 0; i < unicastHosts.length; i++) {
-                unicastHosts[i] = "unicast_test_" + i;
-            }
-        } else {
-            // we need to pin the node ports so we'd know where to point things
-            builder.put("transport.tcp.port", currentBaseHttpPort + nodeOrdinal);
-            for (int i = 0; i < unicastHosts.length; i++) {
-                unicastHosts[i] = "localhost:" + (currentBaseHttpPort + i);
-            }
-        }
-        builder.putArray("discovery.zen.ping.unicast.hosts", unicastHosts);
-        return builder.build();
+    @Before
+    public void clearConfig() {
+        discoveryConfig = null;
     }
 
     @Test
     public void testNormalClusterForming() throws ExecutionException, InterruptedException {
+        int currentNumNodes = randomIntBetween(3, 5);
+        int currentNumOfUnicastHosts = randomIntBetween(1, currentNumNodes);
+        discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(currentNumNodes, currentNumOfUnicastHosts);
+
         internalCluster().startNodesAsync(currentNumNodes).get();
 
         if (client().admin().cluster().prepareHealth().setWaitForNodes("" + currentNumNodes).get().isTimedOut()) {
@@ -92,9 +68,12 @@ public class ZenUnicastDiscoveryTests extends ElasticsearchIntegrationTest {
     // test fails, because 2 nodes elect themselves as master and the health request times out b/c waiting_for_nodes=N
     // can't be satisfied.
     public void testMinimumMasterNodes() throws Exception {
+        int currentNumNodes = randomIntBetween(3, 5);
+        int currentNumOfUnicastHosts = randomIntBetween(1, currentNumNodes);
         final Settings settings = ImmutableSettings.settingsBuilder().put("discovery.zen.minimum_master_nodes", currentNumNodes / 2 + 1).build();
+        discoveryConfig = new ClusterDiscoveryConfiguration.UnicastZen(currentNumNodes, currentNumOfUnicastHosts, settings);
 
-        List<String> nodes = internalCluster().startNodesAsync(currentNumNodes, settings).get();
+        List<String> nodes = internalCluster().startNodesAsync(currentNumNodes).get();
 
         ensureGreen();
 
diff --git a/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTest.java b/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTest.java
new file mode 100644
index 00000000000..df8f67c536f
--- /dev/null
+++ b/src/test/java/org/elasticsearch/discovery/zen/ElectMasterServiceTest.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.discovery.zen;
+
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.transport.DummyTransportAddress;
+import org.elasticsearch.discovery.zen.elect.ElectMasterService;
+import org.elasticsearch.test.ElasticsearchTestCase;
+import org.junit.Test;
+
+import java.util.*;
+
+public class ElectMasterServiceTest extends ElasticsearchTestCase {
+
+    ElectMasterService electMasterService() {
+        return new ElectMasterService(ImmutableSettings.EMPTY);
+    }
+
+    List<DiscoveryNode> generateRandomNodes() {
+        int count = scaledRandomIntBetween(1, 100);
+        ArrayList<DiscoveryNode> nodes = new ArrayList<>(count);
+
+        Map<String, String> master = new HashMap<>();
+        master.put("master", "true");
+        Map<String, String> nonMaster = new HashMap<>();
+        nonMaster.put("master", "false");
+
+        for (int i = 0; i < count; i++) {
+            Map<String, String> attributes = randomBoolean() ? master : nonMaster;
+            DiscoveryNode node = new DiscoveryNode("n_" + i, "n_" + i, DummyTransportAddress.INSTANCE, attributes, Version.CURRENT);
+            nodes.add(node);
+        }
+
+        Collections.shuffle(nodes, getRandom());
+        return nodes;
+    }
+
+    @Test
+    public void sortByMasterLikelihood() {
+        List<DiscoveryNode> nodes = generateRandomNodes();
+        List<DiscoveryNode> sortedNodes = electMasterService().sortByMasterLikelihood(nodes);
+        assertEquals(nodes.size(), sortedNodes.size());
+        DiscoveryNode prevNode = sortedNodes.get(0);
+        for (int i = 1; i < sortedNodes.size(); i++) {
+            DiscoveryNode node = sortedNodes.get(i);
+            if (!prevNode.masterNode()) {
+                assertFalse(node.masterNode());
+            } else if (node.masterNode()) {
+                assertTrue(prevNode.id().compareTo(node.id()) < 0);
+            }
+            prevNode = node;
+        }
+
+    }
+
+    @Test
+    public void electMaster() {
+        List<DiscoveryNode> nodes = generateRandomNodes();
+        ElectMasterService service = electMasterService();
+        int min_master_nodes = randomIntBetween(0, nodes.size());
+        service.minimumMasterNodes(min_master_nodes);
+
+        int master_nodes = 0;
+        for (DiscoveryNode node : nodes) {
+            if (node.masterNode()) {
+                master_nodes++;
+            }
+        }
+        DiscoveryNode master = null;
+        if (service.hasEnoughMasterNodes(nodes)) {
+            master = service.electMaster(nodes);
+        }
+
+        if (master_nodes == 0) {
+            assertNull(master);
+        } else if (min_master_nodes > 0 && master_nodes < min_master_nodes) {
+            assertNull(master);
+        } else {
+            for (DiscoveryNode node : nodes) {
+                if (node.masterNode()) {
+                    assertTrue(master.id().compareTo(node.id()) <= 0);
+                }
+            }
+        }
+    }
+}
diff --git a/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryRejoinOnMaster.java b/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryRejoinOnMaster.java
new file mode 100644
index 00000000000..1ee31505d5e
--- /dev/null
+++ b/src/test/java/org/elasticsearch/discovery/zen/ZenDiscoveryRejoinOnMaster.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.discovery.zen;
+
+import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
+import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
+import org.elasticsearch.common.Priority;
+import org.elasticsearch.common.settings.ImmutableSettings;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.discovery.Discovery;
+import org.elasticsearch.discovery.zen.fd.FaultDetection;
+import org.elasticsearch.test.ElasticsearchIntegrationTest;
+import org.junit.Test;
+
+import static org.hamcrest.Matchers.*;
+
+/**
+ */
+@ElasticsearchIntegrationTest.ClusterScope(scope = ElasticsearchIntegrationTest.Scope.TEST, numDataNodes = 0, numClientNodes = 0)
+public class ZenDiscoveryRejoinOnMaster extends ElasticsearchIntegrationTest {
+
+    @Test
+    public void testChangeRejoinOnMasterOptionIsDynamic() throws Exception {
+        Settings nodeSettings = ImmutableSettings.settingsBuilder()
+                .put("discovery.type", "zen") // <-- To override the local setting if set externally
+                .build();
+        String nodeName = internalCluster().startNode(nodeSettings);
+        ZenDiscovery zenDiscovery = (ZenDiscovery) internalCluster().getInstance(Discovery.class, nodeName);
+        assertThat(zenDiscovery.isRejoinOnMasterGone(), is(true));
+
+        client().admin().cluster().prepareUpdateSettings()
+                .setTransientSettings(ImmutableSettings.builder().put(ZenDiscovery.SETTING_REJOIN_ON_MASTER_GONE, false))
+                .get();
+
+        assertThat(zenDiscovery.isRejoinOnMasterGone(), is(false));
+    }
+
+    @Test
+    public void testNoShardRelocationsOccurWhenElectedMasterNodeFails() throws Exception {
+        Settings defaultSettings = ImmutableSettings.builder()
+                .put(FaultDetection.SETTING_PING_TIMEOUT, "1s")
+                .put(FaultDetection.SETTING_PING_RETRIES, "1")
+                .put("discovery.type", "zen")
+                .build();
+
+        Settings masterNodeSettings = ImmutableSettings.builder()
+                .put("node.data", false)
+                .put(defaultSettings)
+                .build();
+        internalCluster().startNodesAsync(2, masterNodeSettings).get();
+        Settings dateNodeSettings = ImmutableSettings.builder()
+                .put("node.master", false)
+                .put(defaultSettings)
+                .build();
+        internalCluster().startNodesAsync(2, dateNodeSettings).get();
+        ClusterHealthResponse clusterHealthResponse = client().admin().cluster().prepareHealth()
+                .setWaitForEvents(Priority.LANGUID)
+                .setWaitForNodes("4")
+                .setWaitForRelocatingShards(0)
+                .get();
+        assertThat(clusterHealthResponse.isTimedOut(), is(false));
+
+        createIndex("test");
+        ensureSearchable("test");
+        RecoveryResponse r = client().admin().indices().prepareRecoveries("test").get();
+        int numRecoveriesBeforeNewMaster = r.shardResponses().get("test").size();
+
+        final String oldMaster = internalCluster().getMasterName();
+        internalCluster().stopCurrentMasterNode();
+        assertBusy(new Runnable() {
+            @Override
+            public void run() {
+                String current = internalCluster().getMasterName();
+                assertThat(current, notNullValue());
+                assertThat(current, not(equalTo(oldMaster)));
+            }
+        });
+        ensureSearchable("test");
+
+        r = client().admin().indices().prepareRecoveries("test").get();
+        int numRecoveriesAfterNewMaster = r.shardResponses().get("test").size();
+        assertThat(numRecoveriesAfterNewMaster, equalTo(numRecoveriesBeforeNewMaster));
+    }
+
+}
diff --git a/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java b/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java
index 8f18cb11d38..7ecc23b68ef 100644
--- a/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java
+++ b/src/test/java/org/elasticsearch/discovery/zen/ping/unicast/UnicastZenPingTests.java
@@ -30,6 +30,7 @@ import org.elasticsearch.common.transport.InetSocketTransportAddress;
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.discovery.zen.DiscoveryNodesProvider;
+import org.elasticsearch.discovery.zen.elect.ElectMasterService;
 import org.elasticsearch.discovery.zen.ping.ZenPing;
 import org.elasticsearch.node.service.NodeService;
 import org.elasticsearch.test.ElasticsearchTestCase;
@@ -55,6 +56,7 @@ public class UnicastZenPingTests extends ElasticsearchTestCase {
         ThreadPool threadPool = new ThreadPool(getClass().getName());
         ClusterName clusterName = new ClusterName("test");
         NetworkService networkService = new NetworkService(settings);
+        ElectMasterService electMasterService = new ElectMasterService(settings);
 
         NettyTransport transportA = new NettyTransport(settings, threadPool, networkService, BigArrays.NON_RECYCLING_INSTANCE, Version.CURRENT);
         final TransportService transportServiceA = new TransportService(transportA, threadPool).start();
@@ -73,7 +75,7 @@ public class UnicastZenPingTests extends ElasticsearchTestCase {
                 addressB.address().getAddress().getHostAddress() + ":" + addressB.address().getPort())
                 .build();
 
-        UnicastZenPing zenPingA = new UnicastZenPing(hostsSettings, threadPool, transportServiceA, clusterName, Version.CURRENT, null);
+        UnicastZenPing zenPingA = new UnicastZenPing(hostsSettings, threadPool, transportServiceA, clusterName, Version.CURRENT, electMasterService, null);
         zenPingA.setNodesProvider(new DiscoveryNodesProvider() {
             @Override
             public DiscoveryNodes nodes() {
@@ -87,7 +89,7 @@ public class UnicastZenPingTests extends ElasticsearchTestCase {
         });
         zenPingA.start();
 
-        UnicastZenPing zenPingB = new UnicastZenPing(hostsSettings, threadPool, transportServiceB, clusterName, Version.CURRENT, null);
+        UnicastZenPing zenPingB = new UnicastZenPing(hostsSettings, threadPool, transportServiceB, clusterName, Version.CURRENT, electMasterService, null);
         zenPingB.setNodesProvider(new DiscoveryNodesProvider() {
             @Override
             public DiscoveryNodes nodes() {
diff --git a/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java b/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java
index f8fe46cae1f..c7c20b790dd 100644
--- a/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java
+++ b/src/test/java/org/elasticsearch/index/TransportIndexFailuresTest.java
@@ -33,6 +33,7 @@ import org.elasticsearch.common.settings.ImmutableSettings;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.discovery.Discovery;
 import org.elasticsearch.discovery.DiscoverySettings;
+import org.elasticsearch.discovery.zen.fd.FaultDetection;
 import org.elasticsearch.test.ElasticsearchIntegrationTest;
 import org.elasticsearch.test.junit.annotations.TestLogging;
 import org.elasticsearch.test.transport.MockTransportService;
@@ -54,8 +55,8 @@ public class TransportIndexFailuresTest extends ElasticsearchIntegrationTest {
 
     private static final Settings nodeSettings = ImmutableSettings.settingsBuilder()
             .put("discovery.type", "zen") // <-- To override the local setting if set externally
-            .put("discovery.zen.fd.ping_timeout", "1s") // <-- for hitting simulated network failures quickly
-            .put("discovery.zen.fd.ping_retries", "1") // <-- for hitting simulated network failures quickly
+            .put(FaultDetection.SETTING_PING_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly
+            .put(FaultDetection.SETTING_PING_RETRIES, "1") // <-- for hitting simulated network failures quickly
             .put(DiscoverySettings.PUBLISH_TIMEOUT, "1s") // <-- for hitting simulated network failures quickly
             .put("discovery.zen.minimum_master_nodes", 1)
             .put(TransportModule.TRANSPORT_SERVICE_TYPE_KEY, MockTransportService.class.getName())
diff --git a/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java b/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java
index 26bf890f85b..94121d71a63 100644
--- a/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java
+++ b/src/test/java/org/elasticsearch/recovery/FullRollingRestartTests.java
@@ -30,7 +30,7 @@ import org.elasticsearch.test.junit.annotations.TestLogging;
 import org.junit.Test;
 
 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
-import static org.elasticsearch.test.ElasticsearchIntegrationTest.*;
+import static org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
 
 /**
@@ -54,7 +54,7 @@ public class FullRollingRestartTests extends ElasticsearchIntegrationTest {
 
     @Test
     @Slow
-    @TestLogging("indices.cluster:TRACE,cluster.service:TRACE")
+    @TestLogging("indices.cluster:TRACE,cluster.service:TRACE,action.search:TRACE,indices.recovery:TRACE")
     public void testFullRollingRestart() throws Exception {
         internalCluster().startNode();
         createIndex("test");
diff --git a/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java b/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java
index ca2f8a5b050..ff4512b4113 100644
--- a/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java
+++ b/src/test/java/org/elasticsearch/recovery/RecoveryWhileUnderLoadTests.java
@@ -43,7 +43,6 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF
 import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
-import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout;
 import static org.hamcrest.Matchers.equalTo;
 
 public class RecoveryWhileUnderLoadTests extends ElasticsearchIntegrationTest {
diff --git a/src/test/java/org/elasticsearch/test/BackgroundIndexer.java b/src/test/java/org/elasticsearch/test/BackgroundIndexer.java
index 29184b89768..2cafcef5d9f 100644
--- a/src/test/java/org/elasticsearch/test/BackgroundIndexer.java
+++ b/src/test/java/org/elasticsearch/test/BackgroundIndexer.java
@@ -217,7 +217,7 @@ public class BackgroundIndexer implements AutoCloseable {
         setBudget(numOfDocs);
     }
 
-    /** Stop all background threads **/
+    /** Stop all background threads * */
     public void stop() throws InterruptedException {
         if (stop.get()) {
             return;
diff --git a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java
index f7062a94994..9ed53f2fd51 100644
--- a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java
+++ b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java
@@ -97,6 +97,7 @@ import org.elasticsearch.rest.RestStatus;
 import org.elasticsearch.script.ScriptService;
 import org.elasticsearch.search.SearchService;
 import org.elasticsearch.test.client.RandomizingClient;
+import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
 import org.hamcrest.Matchers;
 import org.junit.*;
 
@@ -581,6 +582,7 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
         boolean success = false;
         try {
             logger.info("[{}#{}]: cleaning up after test", getTestClass().getSimpleName(), getTestName());
+            clearDisruptionScheme();
             final Scope currentClusterScope = getCurrentClusterScope();
             try {
                 if (currentClusterScope != Scope.TEST) {
@@ -644,6 +646,13 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
     }
 
     public static Client client() {
+        return client(null);
+    }
+
+    public static Client client(@Nullable String node) {
+        if (node != null) {
+            return internalCluster().client(node);
+        }
         Client client = cluster().client();
         if (frequently()) {
             client = new RandomizingClient(client, getRandom());
@@ -689,6 +698,15 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
         return between(minimumNumberOfReplicas(), maximumNumberOfReplicas());
     }
 
+
+    public void setDisruptionScheme(ServiceDisruptionScheme scheme) {
+        internalCluster().setDisruptionScheme(scheme);
+    }
+
+    public void clearDisruptionScheme() {
+        internalCluster().clearDisruptionScheme();
+    }
+
     /**
      * Returns a settings object used in {@link #createIndex(String...)} and {@link #prepareCreate(String)} and friends.
      * This method can be overwritten by subclasses to set defaults for the indices that are created by the test.
@@ -889,7 +907,7 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase
      * It is useful to ensure that all action on the cluster have finished and all shards that were currently relocating
      * are now allocated and started.
      */
-    public ClusterHealthStatus ensureGreen(String... indices) {
+    public ClusterHealthStatus  ensureGreen(String... indices) {
         ClusterHealthResponse actionGet = client().admin().cluster()
                 .health(Requests.clusterHealthRequest(indices).waitForGreenStatus().waitForEvents(Priority.LANGUID).waitForRelocatingShards(0)).actionGet();
         if (actionGet.isTimedOut()) {
diff --git a/src/test/java/org/elasticsearch/test/InternalTestCluster.java b/src/test/java/org/elasticsearch/test/InternalTestCluster.java
index c28d4e812f5..56cf4b8851f 100644
--- a/src/test/java/org/elasticsearch/test/InternalTestCluster.java
+++ b/src/test/java/org/elasticsearch/test/InternalTestCluster.java
@@ -76,6 +76,7 @@ import org.elasticsearch.plugins.PluginsService;
 import org.elasticsearch.search.SearchService;
 import org.elasticsearch.test.cache.recycler.MockBigArraysModule;
 import org.elasticsearch.test.cache.recycler.MockPageCacheRecyclerModule;
+import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
 import org.elasticsearch.test.engine.MockEngineModule;
 import org.elasticsearch.test.store.MockFSIndexStoreModule;
 import org.elasticsearch.test.transport.AssertingLocalTransport;
@@ -106,6 +107,7 @@ import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
 import static org.elasticsearch.test.ElasticsearchTestCase.assertBusy;
 import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoTimeout;
 import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
 /**
@@ -150,7 +152,7 @@ public final class InternalTestCluster extends TestCluster {
 
     static final boolean DEFAULT_ENABLE_RANDOM_BENCH_NODES = true;
 
-    static final String NODE_MODE = nodeMode();
+    public static final String NODE_MODE = nodeMode();
 
     /* sorted map to make traverse order reproducible, concurrent since we do checks on it not within a sync block */
     private final NavigableMap<String, NodeAndClient> nodes = new TreeMap<>();
@@ -187,6 +189,7 @@ public final class InternalTestCluster extends TestCluster {
      */
     private final String nodePrefix;
 
+    private ServiceDisruptionScheme activeDisruptionScheme;
 
     public InternalTestCluster(long clusterSeed, int minNumDataNodes, int maxNumDataNodes, String clusterName, int numClientNodes, boolean enableRandomBenchNodes,
                                int jvmOrdinal, String nodePrefix) {
@@ -222,7 +225,7 @@ public final class InternalTestCluster extends TestCluster {
                 this.numSharedClientNodes = numClientNodes;
             }
         }
-        assert this.numSharedClientNodes >=0;
+        assert this.numSharedClientNodes >= 0;
 
         this.enableRandomBenchNodes = enableRandomBenchNodes;
 
@@ -251,7 +254,7 @@ public final class InternalTestCluster extends TestCluster {
             if (numOfDataPaths > 0) {
                 StringBuilder dataPath = new StringBuilder();
                 for (int i = 0; i < numOfDataPaths; i++) {
-                    dataPath.append(new File("data/d"+i).getAbsolutePath()).append(',');
+                    dataPath.append(new File("data/d" + i).getAbsolutePath()).append(',');
                 }
                 builder.put("path.data", dataPath.toString());
             }
@@ -275,7 +278,7 @@ public final class InternalTestCluster extends TestCluster {
 
     public static String nodeMode() {
         Builder builder = ImmutableSettings.builder();
-        if (Strings.isEmpty(System.getProperty("es.node.mode"))&& Strings.isEmpty(System.getProperty("es.node.local"))) {
+        if (Strings.isEmpty(System.getProperty("es.node.mode")) && Strings.isEmpty(System.getProperty("es.node.local"))) {
             return "local"; // default if nothing is specified
         }
         if (Strings.hasLength(System.getProperty("es.node.mode"))) {
@@ -296,6 +299,10 @@ public final class InternalTestCluster extends TestCluster {
         return clusterName;
     }
 
+    public String[] getNodeNames() {
+        return nodes.keySet().toArray(Strings.EMPTY_ARRAY);
+    }
+
     private static boolean isLocalTransportConfigured() {
         if ("local".equals(System.getProperty("es.node.mode", "network"))) {
             return true;
@@ -328,7 +335,7 @@ public final class InternalTestCluster extends TestCluster {
                 //.put("index.store.type", random.nextInt(10) == 0 ? MockRamIndexStoreModule.class.getName() : MockFSIndexStoreModule.class.getName())
                 // decrease the routing schedule so new nodes will be added quickly - some random value between 30 and 80 ms
                 .put("cluster.routing.schedule", (30 + random.nextInt(50)) + "ms")
-                // default to non gateway
+                        // default to non gateway
                 .put("gateway.type", "none")
                 .put(SETTING_CLUSTER_NODE_SEED, seed);
         if (ENABLE_MOCK_MODULES && usually(random)) {
@@ -352,7 +359,7 @@ public final class InternalTestCluster extends TestCluster {
             builder.put(SearchService.KEEPALIVE_INTERVAL_KEY, TimeValue.timeValueSeconds(10 + random.nextInt(5 * 60)));
         }
         if (random.nextBoolean()) { // sometimes set a
-            builder.put(SearchService.DEFAUTL_KEEPALIVE_KEY, TimeValue.timeValueSeconds(100 + random.nextInt(5*60)));
+            builder.put(SearchService.DEFAUTL_KEEPALIVE_KEY, TimeValue.timeValueSeconds(100 + random.nextInt(5 * 60)));
         }
         if (random.nextBoolean()) {
             // change threadpool types to make sure we don't have components that rely on the type of thread pools
@@ -493,6 +500,7 @@ public final class InternalTestCluster extends TestCluster {
         while (limit.hasNext()) {
             NodeAndClient next = limit.next();
             nodesToRemove.add(next);
+            removeDistruptionSchemeFromNode(next);
             next.close();
         }
         for (NodeAndClient toRemove : nodesToRemove) {
@@ -667,6 +675,10 @@ public final class InternalTestCluster extends TestCluster {
     @Override
     public void close() {
         if (this.open.compareAndSet(true, false)) {
+            if (activeDisruptionScheme != null) {
+                activeDisruptionScheme.testClusterClosed();
+                activeDisruptionScheme = null;
+            }
             IOUtils.closeWhileHandlingException(nodes.values());
             nodes.clear();
             executor.shutdownNow();
@@ -777,7 +789,6 @@ public final class InternalTestCluster extends TestCluster {
 
     public static final String TRANSPORT_CLIENT_PREFIX = "transport_client_";
     static class TransportClientFactory {
-
         private static TransportClientFactory NO_SNIFF_CLIENT_FACTORY = new TransportClientFactory(false, ImmutableSettings.EMPTY);
         private static TransportClientFactory SNIFF_CLIENT_FACTORY = new TransportClientFactory(true, ImmutableSettings.EMPTY);
 
@@ -831,10 +842,6 @@ public final class InternalTestCluster extends TestCluster {
     }
 
     private synchronized void reset(boolean wipeData) throws IOException {
-        randomlyResetClients();
-        if (wipeData) {
-            wipeDataDirectories();
-        }
         // clear all rules for mock transport services
         for (NodeAndClient nodeAndClient : nodes.values()) {
             TransportService transportService = nodeAndClient.node.injector().getInstance(TransportService.class);
@@ -842,6 +849,10 @@ public final class InternalTestCluster extends TestCluster {
                 ((MockTransportService) transportService).clearAllRules();
             }
         }
+        randomlyResetClients();
+        if (wipeData) {
+            wipeDataDirectories();
+        }
         if (nextNodeId.get() == sharedNodesSeeds.length && nodes.size() == sharedNodesSeeds.length) {
             logger.debug("Cluster hasn't changed - moving out - nodes: [{}] nextNodeId: [{}] numSharedNodes: [{}]", nodes.keySet(), nextNodeId.get(), sharedNodesSeeds.length);
             return;
@@ -1030,6 +1041,7 @@ public final class InternalTestCluster extends TestCluster {
         NodeAndClient nodeAndClient = getRandomNodeAndClient(new DataNodePredicate());
         if (nodeAndClient != null) {
             logger.info("Closing random node [{}] ", nodeAndClient.name);
+            removeDistruptionSchemeFromNode(nodeAndClient);
             nodes.remove(nodeAndClient.name);
             nodeAndClient.close();
         }
@@ -1049,6 +1061,7 @@ public final class InternalTestCluster extends TestCluster {
         });
         if (nodeAndClient != null) {
             logger.info("Closing filtered random node [{}] ", nodeAndClient.name);
+            removeDistruptionSchemeFromNode(nodeAndClient);
             nodes.remove(nodeAndClient.name);
             nodeAndClient.close();
         }
@@ -1063,6 +1076,7 @@ public final class InternalTestCluster extends TestCluster {
         String masterNodeName = getMasterName();
         assert nodes.containsKey(masterNodeName);
         logger.info("Closing master node [{}] ", masterNodeName);
+        removeDistruptionSchemeFromNode(nodes.get(masterNodeName));
         NodeAndClient remove = nodes.remove(masterNodeName);
         remove.close();
     }
@@ -1074,6 +1088,7 @@ public final class InternalTestCluster extends TestCluster {
         NodeAndClient nodeAndClient = getRandomNodeAndClient(Predicates.not(new MasterNodePredicate(getMasterName())));
         if (nodeAndClient != null) {
             logger.info("Closing random non master node [{}] current master [{}] ", nodeAndClient.name, getMasterName());
+            removeDistruptionSchemeFromNode(nodeAndClient);
             nodes.remove(nodeAndClient.name);
             nodeAndClient.close();
         }
@@ -1127,6 +1142,9 @@ public final class InternalTestCluster extends TestCluster {
                 if (!callback.doRestart(nodeAndClient.name)) {
                     logger.info("Closing node [{}] during restart", nodeAndClient.name);
                     toRemove.add(nodeAndClient);
+                    if (activeDisruptionScheme != null) {
+                        activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
+                    }
                     nodeAndClient.close();
                 }
             }
@@ -1141,18 +1159,33 @@ public final class InternalTestCluster extends TestCluster {
             for (NodeAndClient nodeAndClient : nodes.values()) {
                 callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient());
                 logger.info("Restarting node [{}] ", nodeAndClient.name);
+                if (activeDisruptionScheme != null) {
+                    activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
+                }
                 nodeAndClient.restart(callback);
+                if (activeDisruptionScheme != null) {
+                    activeDisruptionScheme.applyToNode(nodeAndClient.name, this);
+                }
             }
         } else {
             int numNodesRestarted = 0;
             for (NodeAndClient nodeAndClient : nodes.values()) {
                 callback.doAfterNodes(numNodesRestarted++, nodeAndClient.nodeClient());
                 logger.info("Stopping node [{}] ", nodeAndClient.name);
+                if (activeDisruptionScheme != null) {
+                    activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
+                }
                 nodeAndClient.node.close();
             }
             for (NodeAndClient nodeAndClient : nodes.values()) {
                 logger.info("Starting node [{}] ", nodeAndClient.name);
+                if (activeDisruptionScheme != null) {
+                    activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
+                }
                 nodeAndClient.restart(callback);
+                if (activeDisruptionScheme != null) {
+                    activeDisruptionScheme.applyToNode(nodeAndClient.name, this);
+                }
             }
         }
     }
@@ -1193,7 +1226,10 @@ public final class InternalTestCluster extends TestCluster {
     }
 
 
-    private String getMasterName() {
+    /**
+     * get the name of the current master node
+     */
+    public String getMasterName() {
         try {
             ClusterState state = client().admin().cluster().prepareState().execute().actionGet().getState();
             return state.nodes().masterNode().name();
@@ -1350,6 +1386,7 @@ public final class InternalTestCluster extends TestCluster {
             dataDirToClean.addAll(Arrays.asList(nodeEnv.nodeDataLocations()));
         }
         nodes.put(nodeAndClient.name, nodeAndClient);
+        applyDisruptionSchemeToNode(nodeAndClient);
     }
 
     public void closeNonSharedNodes(boolean wipeData) throws IOException {
@@ -1371,6 +1408,48 @@ public final class InternalTestCluster extends TestCluster {
         return hasFilterCache;
     }
 
+    public void setDisruptionScheme(ServiceDisruptionScheme scheme) {
+        clearDisruptionScheme();
+        scheme.applyToCluster(this);
+        activeDisruptionScheme = scheme;
+    }
+
+    public void clearDisruptionScheme() {
+        if (activeDisruptionScheme != null) {
+            TimeValue expectedHealingTime = activeDisruptionScheme.expectedTimeToHeal();
+            logger.info("Clearing active scheme {}, expected healing time {}", activeDisruptionScheme, expectedHealingTime);
+            activeDisruptionScheme.removeFromCluster(this);
+            // We don't what scheme is picked, certain schemes don't partition the cluster, but process slow, so we need
+            // to to sleep, cluster health alone doesn't verify if these schemes have been cleared.
+            if (expectedHealingTime != null && expectedHealingTime.millis() > 0) {
+                try {
+                    Thread.sleep(expectedHealingTime.millis());
+                } catch (InterruptedException e) {
+                    Thread.currentThread().interrupt();
+                }
+            }
+            assertFalse("cluster failed to form after disruption was healed", client().admin().cluster().prepareHealth()
+                    .setWaitForNodes("" + nodes.size())
+                    .setWaitForRelocatingShards(0)
+                    .get().isTimedOut());
+        }
+        activeDisruptionScheme = null;
+    }
+
+    private void applyDisruptionSchemeToNode(NodeAndClient nodeAndClient) {
+        if (activeDisruptionScheme != null) {
+            assert nodes.containsKey(nodeAndClient.name);
+            activeDisruptionScheme.applyToNode(nodeAndClient.name, this);
+        }
+    }
+
+    private void removeDistruptionSchemeFromNode(NodeAndClient nodeAndClient) {
+        if (activeDisruptionScheme != null) {
+            assert nodes.containsKey(nodeAndClient.name);
+            activeDisruptionScheme.removeFromNode(nodeAndClient.name, this);
+        }
+    }
+
     private synchronized Collection<NodeAndClient> dataNodeAndClients() {
         return Collections2.filter(nodes.values(), new DataNodePredicate());
     }
diff --git a/src/test/java/org/elasticsearch/test/SettingsSource.java b/src/test/java/org/elasticsearch/test/SettingsSource.java
index 8829885bf7b..6341d842d67 100644
--- a/src/test/java/org/elasticsearch/test/SettingsSource.java
+++ b/src/test/java/org/elasticsearch/test/SettingsSource.java
@@ -20,7 +20,7 @@ package org.elasticsearch.test;
 
 import org.elasticsearch.common.settings.Settings;
 
-abstract class SettingsSource {
+public abstract class SettingsSource {
 
     public static final SettingsSource EMPTY = new SettingsSource() {
         @Override
@@ -35,7 +35,7 @@ abstract class SettingsSource {
     };
 
     /**
-     * @return  the settings for the node represented by the given ordinal, or {@code null} if there are no settings defined
+     * @return the settings for the node represented by the given ordinal, or {@code null} if there are no settings defined
      */
     public abstract Settings node(int nodeOrdinal);
 
diff --git a/src/test/java/org/elasticsearch/test/disruption/LongGCDisruption.java b/src/test/java/org/elasticsearch/test/disruption/LongGCDisruption.java
new file mode 100644
index 00000000000..d2fa09cb7dd
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/LongGCDisruption.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.common.unit.TimeValue;
+
+import java.util.HashSet;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.regex.Pattern;
+
+public class LongGCDisruption extends SingleNodeDisruption {
+
+    volatile boolean disrupting;
+    volatile Thread worker;
+
+    final long intervalBetweenDelaysMin;
+    final long intervalBetweenDelaysMax;
+    final long delayDurationMin;
+    final long delayDurationMax;
+
+
+    public LongGCDisruption(Random random) {
+        this(null, random);
+    }
+
+    public LongGCDisruption(String disruptedNode, Random random) {
+        this(disruptedNode, random, 100, 200, 300, 20000);
+    }
+
+    public LongGCDisruption(String disruptedNode, Random random, long intervalBetweenDelaysMin,
+                            long intervalBetweenDelaysMax, long delayDurationMin, long delayDurationMax) {
+        this(random, intervalBetweenDelaysMin, intervalBetweenDelaysMax, delayDurationMin, delayDurationMax);
+        this.disruptedNode = disruptedNode;
+    }
+
+    public LongGCDisruption(Random random,
+                            long intervalBetweenDelaysMin, long intervalBetweenDelaysMax, long delayDurationMin,
+                            long delayDurationMax) {
+        super(random);
+        this.intervalBetweenDelaysMin = intervalBetweenDelaysMin;
+        this.intervalBetweenDelaysMax = intervalBetweenDelaysMax;
+        this.delayDurationMin = delayDurationMin;
+        this.delayDurationMax = delayDurationMax;
+    }
+
+    final static AtomicInteger thread_ids = new AtomicInteger();
+
+    @Override
+    public void startDisrupting() {
+        disrupting = true;
+        worker = new Thread(new BackgroundWorker(), "long_gc_simulation_" + thread_ids.incrementAndGet());
+        worker.setDaemon(true);
+        worker.start();
+    }
+
+    @Override
+    public void stopDisrupting() {
+        if (worker == null) {
+            return;
+        }
+        logger.info("stopping long GCs on [{}]", disruptedNode);
+        disrupting = false;
+        worker.interrupt();
+        try {
+            worker.join(2 * (intervalBetweenDelaysMax + delayDurationMax));
+        } catch (InterruptedException e) {
+            logger.info("background thread failed to stop");
+        }
+        worker = null;
+    }
+
+    final static Pattern[] unsafeClasses = new Pattern[]{
+            // logging has shared JVM locks - we may suspend a thread and block other nodes from doing their thing
+            Pattern.compile("Logger")
+    };
+
+    private boolean stopNodeThreads(String node, Set<Thread> nodeThreads) {
+        Set<Thread> allThreadsSet = Thread.getAllStackTraces().keySet();
+        boolean stopped = false;
+        final String nodeThreadNamePart = "[" + node + "]";
+        for (Thread thread : allThreadsSet) {
+            String name = thread.getName();
+            if (name.contains(nodeThreadNamePart)) {
+                if (thread.isAlive() && nodeThreads.add(thread)) {
+                    stopped = true;
+                    thread.suspend();
+                    // double check the thread is not in a shared resource like logging. If so, let it go and come back..
+                    boolean safe = true;
+                    safe:
+                    for (StackTraceElement stackElement : thread.getStackTrace()) {
+                        String className = stackElement.getClassName();
+                        for (Pattern unsafePattern : unsafeClasses) {
+                            if (unsafePattern.matcher(className).find()) {
+                                safe = false;
+                                break safe;
+                            }
+                        }
+                    }
+                    if (!safe) {
+                        thread.resume();
+                        nodeThreads.remove(thread);
+                    }
+                }
+            }
+        }
+        return stopped;
+    }
+
+    private void resumeThreads(Set<Thread> threads) {
+        for (Thread thread : threads) {
+            thread.resume();
+        }
+    }
+
+    private void simulateLongGC(final TimeValue duration) throws InterruptedException {
+        final String disruptionNodeCopy = disruptedNode;
+        if (disruptionNodeCopy == null) {
+            return;
+        }
+        logger.info("node [{}] goes into GC for for [{}]", disruptionNodeCopy, duration);
+        final Set<Thread> nodeThreads = new HashSet<>();
+        try {
+            while (stopNodeThreads(disruptionNodeCopy, nodeThreads)) ;
+            if (!nodeThreads.isEmpty()) {
+                Thread.sleep(duration.millis());
+            }
+        } finally {
+            logger.info("node [{}] resumes from GC", disruptionNodeCopy);
+            resumeThreads(nodeThreads);
+        }
+    }
+
+    @Override
+    public TimeValue expectedTimeToHeal() {
+        return TimeValue.timeValueMillis(0);
+    }
+
+    class BackgroundWorker implements Runnable {
+
+        @Override
+        public void run() {
+            while (disrupting && disruptedNode != null) {
+                try {
+                    TimeValue duration = new TimeValue(delayDurationMin + random.nextInt((int) (delayDurationMax - delayDurationMin)));
+                    simulateLongGC(duration);
+
+                    duration = new TimeValue(intervalBetweenDelaysMin + random.nextInt((int) (intervalBetweenDelaysMax - intervalBetweenDelaysMin)));
+                    if (disrupting && disruptedNode != null) {
+                        Thread.sleep(duration.millis());
+                    }
+                } catch (InterruptedException e) {
+                } catch (Exception e) {
+                    logger.error("error in background worker", e);
+                }
+            }
+        }
+    }
+
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkDelaysPartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkDelaysPartition.java
new file mode 100644
index 00000000000..9eb99302e46
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/NetworkDelaysPartition.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.test.transport.MockTransportService;
+
+import java.util.Random;
+import java.util.Set;
+
+public class NetworkDelaysPartition extends NetworkPartition {
+
+    static long DEFAULT_DELAY_MIN = 10000;
+    static long DEFAULT_DELAY_MAX = 90000;
+
+
+    final long delayMin;
+    final long delayMax;
+
+    TimeValue duration;
+
+    public NetworkDelaysPartition(Random random) {
+        this(random, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX);
+    }
+
+    public NetworkDelaysPartition(Random random, long delayMin, long delayMax) {
+        super(random);
+        this.delayMin = delayMin;
+        this.delayMax = delayMax;
+    }
+
+    public NetworkDelaysPartition(String node1, String node2, Random random) {
+        this(node1, node2, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX, random);
+    }
+
+    public NetworkDelaysPartition(String node1, String node2, long delayMin, long delayMax, Random random) {
+        super(node1, node2, random);
+        this.delayMin = delayMin;
+        this.delayMax = delayMax;
+    }
+
+    public NetworkDelaysPartition(Set<String> nodesSideOne, Set<String> nodesSideTwo, Random random) {
+        this(nodesSideOne, nodesSideTwo, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX, random);
+    }
+
+    public NetworkDelaysPartition(Set<String> nodesSideOne, Set<String> nodesSideTwo, long delayMin, long delayMax, Random random) {
+        super(nodesSideOne, nodesSideTwo, random);
+        this.delayMin = delayMin;
+        this.delayMax = delayMax;
+
+    }
+
+    @Override
+    public synchronized void startDisrupting() {
+        duration = new TimeValue(delayMin + random.nextInt((int) (delayMax - delayMin)));
+        super.startDisrupting();
+    }
+
+    @Override
+    void applyDisruption(DiscoveryNode node1, MockTransportService transportService1,
+                         DiscoveryNode node2, MockTransportService transportService2) {
+        transportService1.addUnresponsiveRule(node1, duration);
+        transportService1.addUnresponsiveRule(node2, duration);
+    }
+
+    @Override
+    protected String getPartitionDescription() {
+        return "network delays for [" + duration + "]";
+    }
+
+    @Override
+    public TimeValue expectedTimeToHeal() {
+        return TimeValue.timeValueMillis(delayMax);
+    }
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkDisconnectPartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkDisconnectPartition.java
new file mode 100644
index 00000000000..8653b50f749
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/NetworkDisconnectPartition.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.test.transport.MockTransportService;
+
+import java.util.Random;
+import java.util.Set;
+
+public class NetworkDisconnectPartition extends NetworkPartition {
+
+
+    public NetworkDisconnectPartition(Random random) {
+        super(random);
+    }
+
+    public NetworkDisconnectPartition(String node1, String node2, Random random) {
+        super(node1, node2, random);
+    }
+
+    public NetworkDisconnectPartition(Set<String> nodesSideOne, Set<String> nodesSideTwo, Random random) {
+        super(nodesSideOne, nodesSideTwo, random);
+    }
+
+    @Override
+    protected String getPartitionDescription() {
+        return "disconnected";
+    }
+
+    @Override
+    void applyDisruption(DiscoveryNode node1, MockTransportService transportService1,
+                         DiscoveryNode node2, MockTransportService transportService2) {
+        transportService1.addFailToSendNoConnectRule(node2);
+        transportService2.addFailToSendNoConnectRule(node1);
+    }
+
+    @Override
+    public TimeValue expectedTimeToHeal() {
+        return TimeValue.timeValueSeconds(0);
+    }
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkPartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkPartition.java
new file mode 100644
index 00000000000..8206fafef4e
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/NetworkPartition.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import com.google.common.collect.ImmutableList;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.discovery.Discovery;
+import org.elasticsearch.test.InternalTestCluster;
+import org.elasticsearch.test.transport.MockTransportService;
+import org.elasticsearch.transport.TransportService;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+public abstract class NetworkPartition implements ServiceDisruptionScheme {
+
+    protected final ESLogger logger = Loggers.getLogger(getClass());
+
+    final Set<String> nodesSideOne;
+    final Set<String> nodesSideTwo;
+    volatile boolean autoExpand;
+    protected final Random random;
+    protected volatile InternalTestCluster cluster;
+    protected volatile boolean activeDisruption = false;
+
+
+    public NetworkPartition(Random random) {
+        this.random = new Random(random.nextLong());
+        nodesSideOne = new HashSet<>();
+        nodesSideTwo = new HashSet<>();
+        autoExpand = true;
+    }
+
+    public NetworkPartition(String node1, String node2, Random random) {
+        this(random);
+        nodesSideOne.add(node1);
+        nodesSideTwo.add(node2);
+        autoExpand = false;
+    }
+
+    public NetworkPartition(Set<String> nodesSideOne, Set<String> nodesSideTwo, Random random) {
+        this(random);
+        this.nodesSideOne.addAll(nodesSideOne);
+        this.nodesSideTwo.addAll(nodesSideTwo);
+        autoExpand = false;
+    }
+
+
+    public List<String> getNodesSideOne() {
+        return ImmutableList.copyOf(nodesSideOne);
+    }
+
+    public List<String> getNodesSideTwo() {
+        return ImmutableList.copyOf(nodesSideTwo);
+    }
+
+    public List<String> getMajoritySide() {
+        if (nodesSideOne.size() >= nodesSideTwo.size()) {
+            return getNodesSideOne();
+        } else {
+            return getNodesSideTwo();
+        }
+    }
+
+    public List<String> getMinoritySide() {
+        if (nodesSideOne.size() >= nodesSideTwo.size()) {
+            return getNodesSideTwo();
+        } else {
+            return getNodesSideOne();
+        }
+    }
+
+    @Override
+    public void applyToCluster(InternalTestCluster cluster) {
+        this.cluster = cluster;
+        if (autoExpand) {
+            for (String node : cluster.getNodeNames()) {
+                applyToNode(node, cluster);
+            }
+        }
+    }
+
+    @Override
+    public void removeFromCluster(InternalTestCluster cluster) {
+        stopDisrupting();
+    }
+
+    @Override
+    public synchronized void applyToNode(String node, InternalTestCluster cluster) {
+        if (!autoExpand || nodesSideOne.contains(node) || nodesSideTwo.contains(node)) {
+            return;
+        }
+        if (nodesSideOne.isEmpty()) {
+            nodesSideOne.add(node);
+        } else if (nodesSideTwo.isEmpty()) {
+            nodesSideTwo.add(node);
+        } else if (random.nextBoolean()) {
+            nodesSideOne.add(node);
+        } else {
+            nodesSideTwo.add(node);
+        }
+    }
+
+    @Override
+    public synchronized void removeFromNode(String node, InternalTestCluster cluster) {
+        MockTransportService transportService = (MockTransportService) cluster.getInstance(TransportService.class, node);
+        DiscoveryNode discoveryNode = discoveryNode(node);
+        Set<String> otherSideNodes;
+        if (nodesSideOne.contains(node)) {
+            otherSideNodes = nodesSideTwo;
+        } else if (nodesSideTwo.contains(node)) {
+            otherSideNodes = nodesSideOne;
+        } else {
+            return;
+        }
+        for (String node2 : otherSideNodes) {
+            MockTransportService transportService2 = (MockTransportService) cluster.getInstance(TransportService.class, node2);
+            DiscoveryNode discoveryNode2 = discoveryNode(node2);
+            removeDisruption(discoveryNode, transportService, discoveryNode2, transportService2);
+        }
+    }
+
+    @Override
+    public synchronized void testClusterClosed() {
+
+    }
+
+    protected abstract String getPartitionDescription();
+
+
+    protected DiscoveryNode discoveryNode(String node) {
+        return cluster.getInstance(Discovery.class, node).localNode();
+    }
+
+    @Override
+    public synchronized void startDisrupting() {
+        if (nodesSideOne.size() == 0 || nodesSideTwo.size() == 0) {
+            return;
+        }
+        logger.info("nodes {} will be partitioned from {}. partition type [{}]", nodesSideOne, nodesSideTwo, getPartitionDescription());
+        activeDisruption = true;
+        for (String node1 : nodesSideOne) {
+            MockTransportService transportService1 = (MockTransportService) cluster.getInstance(TransportService.class, node1);
+            DiscoveryNode discoveryNode1 = discoveryNode(node1);
+            for (String node2 : nodesSideTwo) {
+                DiscoveryNode discoveryNode2 = discoveryNode(node2);
+                MockTransportService transportService2 = (MockTransportService) cluster.getInstance(TransportService.class, node2);
+                applyDisruption(discoveryNode1, transportService1, discoveryNode2, transportService2);
+            }
+        }
+    }
+
+
+    @Override
+    public synchronized void stopDisrupting() {
+        if (nodesSideOne.size() == 0 || nodesSideTwo.size() == 0 || !activeDisruption) {
+            return;
+        }
+        logger.info("restoring partition between nodes {} & nodes {}", nodesSideOne, nodesSideTwo);
+        for (String node1 : nodesSideOne) {
+            MockTransportService transportService1 = (MockTransportService) cluster.getInstance(TransportService.class, node1);
+            DiscoveryNode discoveryNode1 = discoveryNode(node1);
+            for (String node2 : nodesSideTwo) {
+                DiscoveryNode discoveryNode2 = discoveryNode(node2);
+                MockTransportService transportService2 = (MockTransportService) cluster.getInstance(TransportService.class, node2);
+                removeDisruption(discoveryNode1, transportService1, discoveryNode2, transportService2);
+            }
+        }
+        activeDisruption = false;
+    }
+
+    abstract void applyDisruption(DiscoveryNode node1, MockTransportService transportService1,
+                                  DiscoveryNode node2, MockTransportService transportService2);
+
+
+    protected void removeDisruption(DiscoveryNode node1, MockTransportService transportService1,
+                                    DiscoveryNode node2, MockTransportService transportService2) {
+        transportService1.clearRule(node2);
+        transportService2.clearRule(node1);
+    }
+
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/NetworkUnresponsivePartition.java b/src/test/java/org/elasticsearch/test/disruption/NetworkUnresponsivePartition.java
new file mode 100644
index 00000000000..1feb56c46c7
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/NetworkUnresponsivePartition.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.test.transport.MockTransportService;
+
+import java.util.Random;
+import java.util.Set;
+
+public class NetworkUnresponsivePartition extends NetworkPartition {
+
+    public NetworkUnresponsivePartition(Random random) {
+        super(random);
+    }
+
+    public NetworkUnresponsivePartition(String node1, String node2, Random random) {
+        super(node1, node2, random);
+    }
+
+    public NetworkUnresponsivePartition(Set<String> nodesSideOne, Set<String> nodesSideTwo, Random random) {
+        super(nodesSideOne, nodesSideTwo, random);
+    }
+
+    @Override
+    protected String getPartitionDescription() {
+        return "unresponsive";
+    }
+
+    @Override
+    void applyDisruption(DiscoveryNode node1, MockTransportService transportService1,
+                         DiscoveryNode node2, MockTransportService transportService2) {
+        transportService1.addUnresponsiveRule(node2);
+        transportService2.addUnresponsiveRule(node1);
+    }
+
+    @Override
+    public TimeValue expectedTimeToHeal() {
+        return TimeValue.timeValueSeconds(0);
+    }
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/NoOpDisruptionScheme.java b/src/test/java/org/elasticsearch/test/disruption/NoOpDisruptionScheme.java
new file mode 100644
index 00000000000..7b348b1afea
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/NoOpDisruptionScheme.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.test.InternalTestCluster;
+
+public class NoOpDisruptionScheme implements ServiceDisruptionScheme {
+
+    @Override
+    public void applyToCluster(InternalTestCluster cluster) {
+
+    }
+
+    @Override
+    public void removeFromCluster(InternalTestCluster cluster) {
+
+    }
+
+    @Override
+    public void applyToNode(String node, InternalTestCluster cluster) {
+
+    }
+
+    @Override
+    public void removeFromNode(String node, InternalTestCluster cluster) {
+
+    }
+
+    @Override
+    public void startDisrupting() {
+
+    }
+
+    @Override
+    public void stopDisrupting() {
+
+    }
+
+    @Override
+    public void testClusterClosed() {
+
+    }
+
+    @Override
+    public TimeValue expectedTimeToHeal() {
+        return TimeValue.timeValueSeconds(0);
+    }
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/ServiceDisruptionScheme.java b/src/test/java/org/elasticsearch/test/disruption/ServiceDisruptionScheme.java
new file mode 100644
index 00000000000..70774a82356
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/ServiceDisruptionScheme.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.test.InternalTestCluster;
+
+public interface ServiceDisruptionScheme {
+
+    public void applyToCluster(InternalTestCluster cluster);
+
+    public void removeFromCluster(InternalTestCluster cluster);
+
+    public void applyToNode(String node, InternalTestCluster cluster);
+
+    public void removeFromNode(String node, InternalTestCluster cluster);
+
+    public void startDisrupting();
+
+    public void stopDisrupting();
+
+    public void testClusterClosed();
+
+    public TimeValue expectedTimeToHeal();
+
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/SingleNodeDisruption.java b/src/test/java/org/elasticsearch/test/disruption/SingleNodeDisruption.java
new file mode 100644
index 00000000000..3148254011e
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/SingleNodeDisruption.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.common.logging.ESLogger;
+import org.elasticsearch.common.logging.Loggers;
+import org.elasticsearch.test.InternalTestCluster;
+
+import java.util.Random;
+
+public abstract class SingleNodeDisruption implements ServiceDisruptionScheme {
+
+    protected final ESLogger logger = Loggers.getLogger(getClass());
+
+    protected volatile String disruptedNode;
+    protected volatile InternalTestCluster cluster;
+    protected final Random random;
+
+
+    public SingleNodeDisruption(String disruptedNode, Random random) {
+        this(random);
+        this.disruptedNode = disruptedNode;
+    }
+
+    public SingleNodeDisruption(Random random) {
+        this.random = new Random(random.nextLong());
+    }
+
+    @Override
+    public void applyToCluster(InternalTestCluster cluster) {
+        this.cluster = cluster;
+        if (disruptedNode == null) {
+            String[] nodes = cluster.getNodeNames();
+            disruptedNode = nodes[random.nextInt(nodes.length)];
+        }
+    }
+
+    @Override
+    public void removeFromCluster(InternalTestCluster cluster) {
+        if (disruptedNode != null) {
+            removeFromNode(disruptedNode, cluster);
+        }
+    }
+
+    @Override
+    public synchronized void applyToNode(String node, InternalTestCluster cluster) {
+
+    }
+
+    @Override
+    public synchronized void removeFromNode(String node, InternalTestCluster cluster) {
+        if (disruptedNode == null) {
+            return;
+        }
+        if (!node.equals(disruptedNode)) {
+            return;
+        }
+        stopDisrupting();
+        disruptedNode = null;
+    }
+
+    @Override
+    public synchronized void testClusterClosed() {
+        disruptedNode = null;
+    }
+
+}
diff --git a/src/test/java/org/elasticsearch/test/disruption/SlowClusterStateProcessing.java b/src/test/java/org/elasticsearch/test/disruption/SlowClusterStateProcessing.java
new file mode 100644
index 00000000000..46ae0afe54c
--- /dev/null
+++ b/src/test/java/org/elasticsearch/test/disruption/SlowClusterStateProcessing.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.test.disruption;
+
+import org.elasticsearch.cluster.ClusterService;
+import org.elasticsearch.cluster.ClusterState;
+import org.elasticsearch.cluster.ClusterStateNonMasterUpdateTask;
+import org.elasticsearch.common.Priority;
+import org.elasticsearch.common.unit.TimeValue;
+
+import java.util.Random;
+import java.util.concurrent.CountDownLatch;
+
+public class SlowClusterStateProcessing extends SingleNodeDisruption {
+
+    volatile boolean disrupting;
+    volatile Thread worker;
+
+    final long intervalBetweenDelaysMin;
+    final long intervalBetweenDelaysMax;
+    final long delayDurationMin;
+    final long delayDurationMax;
+
+
+    public SlowClusterStateProcessing(Random random) {
+        this(null, random);
+    }
+
+    public SlowClusterStateProcessing(String disruptedNode, Random random) {
+        this(disruptedNode, random, 100, 200, 300, 20000);
+    }
+
+    public SlowClusterStateProcessing(String disruptedNode, Random random, long intervalBetweenDelaysMin,
+                                      long intervalBetweenDelaysMax, long delayDurationMin, long delayDurationMax) {
+        this(random, intervalBetweenDelaysMin, intervalBetweenDelaysMax, delayDurationMin, delayDurationMax);
+        this.disruptedNode = disruptedNode;
+    }
+
+    public SlowClusterStateProcessing(Random random,
+                                      long intervalBetweenDelaysMin, long intervalBetweenDelaysMax, long delayDurationMin,
+                                      long delayDurationMax) {
+        super(random);
+        this.intervalBetweenDelaysMin = intervalBetweenDelaysMin;
+        this.intervalBetweenDelaysMax = intervalBetweenDelaysMax;
+        this.delayDurationMin = delayDurationMin;
+        this.delayDurationMax = delayDurationMax;
+    }
+
+
+    @Override
+    public void startDisrupting() {
+        disrupting = true;
+        worker = new Thread(new BackgroundWorker());
+        worker.setDaemon(true);
+        worker.start();
+    }
+
+    @Override
+    public void stopDisrupting() {
+        if (worker == null) {
+            return;
+        }
+        logger.info("stopping to slow down cluster state processing on [{}]", disruptedNode);
+        disrupting = false;
+        worker.interrupt();
+        try {
+            worker.join(2 * (intervalBetweenDelaysMax + delayDurationMax));
+        } catch (InterruptedException e) {
+            logger.info("background thread failed to stop");
+        }
+        worker = null;
+    }
+
+
+    private boolean interruptClusterStateProcessing(final TimeValue duration) throws InterruptedException {
+        final String disruptionNodeCopy = disruptedNode;
+        if (disruptionNodeCopy == null) {
+            return false;
+        }
+        logger.info("delaying cluster state updates on node [{}] for [{}]", disruptionNodeCopy, duration);
+        final CountDownLatch countDownLatch = new CountDownLatch(1);
+        ClusterService clusterService = cluster.getInstance(ClusterService.class, disruptionNodeCopy);
+        if (clusterService == null) {
+            return false;
+        }
+        clusterService.submitStateUpdateTask("service_disruption_delay", Priority.IMMEDIATE, new ClusterStateNonMasterUpdateTask() {
+
+            @Override
+            public ClusterState execute(ClusterState currentState) throws Exception {
+                Thread.sleep(duration.millis());
+                countDownLatch.countDown();
+                return currentState;
+            }
+
+            @Override
+            public void onFailure(String source, Throwable t) {
+                countDownLatch.countDown();
+            }
+        });
+        try {
+            countDownLatch.await();
+        } catch (InterruptedException e) {
+            // try to wait again, we really want the cluster state thread to be freed up when stopping disruption
+            countDownLatch.await();
+        }
+        return true;
+    }
+
+    @Override
+    public TimeValue expectedTimeToHeal() {
+        return TimeValue.timeValueMillis(0);
+    }
+
+    class BackgroundWorker implements Runnable {
+
+        @Override
+        public void run() {
+            while (disrupting && disruptedNode != null) {
+                try {
+                    TimeValue duration = new TimeValue(delayDurationMin + random.nextInt((int) (delayDurationMax - delayDurationMin)));
+                    if (!interruptClusterStateProcessing(duration)) {
+                        continue;
+                    }
+
+                    duration = new TimeValue(intervalBetweenDelaysMin + random.nextInt((int) (intervalBetweenDelaysMax - intervalBetweenDelaysMin)));
+                    if (disrupting && disruptedNode != null) {
+                        Thread.sleep(duration.millis());
+                    }
+                } catch (InterruptedException e) {
+                } catch (Exception e) {
+                    logger.error("error in background worker", e);
+                }
+            }
+        }
+    }
+
+}
diff --git a/src/test/java/org/elasticsearch/test/transport/MockTransportService.java b/src/test/java/org/elasticsearch/test/transport/MockTransportService.java
index 14f0296121e..cf088bab476 100644
--- a/src/test/java/org/elasticsearch/test/transport/MockTransportService.java
+++ b/src/test/java/org/elasticsearch/test/transport/MockTransportService.java
@@ -24,14 +24,21 @@ import org.elasticsearch.cluster.node.DiscoveryNode;
 import org.elasticsearch.common.component.Lifecycle;
 import org.elasticsearch.common.component.LifecycleListener;
 import org.elasticsearch.common.inject.Inject;
+import org.elasticsearch.common.io.stream.BytesStreamInput;
+import org.elasticsearch.common.io.stream.BytesStreamOutput;
+import org.elasticsearch.common.network.NetworkService;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.transport.BoundTransportAddress;
 import org.elasticsearch.common.transport.TransportAddress;
+import org.elasticsearch.common.unit.TimeValue;
+import org.elasticsearch.common.util.concurrent.AbstractRunnable;
 import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.transport.*;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.ConcurrentMap;
 
@@ -46,6 +53,7 @@ public class MockTransportService extends TransportService {
     public MockTransportService(Settings settings, Transport transport, ThreadPool threadPool) {
         super(settings, new LookupTestTransport(transport), threadPool);
         this.original = transport;
+
     }
 
     /**
@@ -92,12 +100,19 @@ public class MockTransportService extends TransportService {
         });
     }
 
+    /**
+     * Adds a rule that will cause matching operations to throw ConnectTransportExceptions
+     */
+    public void addFailToSendNoConnectRule(DiscoveryNode node, final String... blockedActions) {
+        addFailToSendNoConnectRule(node, new HashSet<>(Arrays.asList(blockedActions)));
+    }
+
     /**
      * Adds a rule that will cause matching operations to throw ConnectTransportExceptions
      */
     public void addFailToSendNoConnectRule(DiscoveryNode node, final Set<String> blockedActions) {
 
-        ((LookupTestTransport) transport).transports.put(node.getAddress(), new DelegateTransport(original) {
+        addDelegate(node, new DelegateTransport(original) {
             @Override
             public void connectToNode(DiscoveryNode node) throws ConnectTransportException {
                 original.connectToNode(node);
@@ -124,7 +139,6 @@ public class MockTransportService extends TransportService {
      * and failing to connect once the rule was added.
      */
     public void addUnresponsiveRule(DiscoveryNode node) {
-        // TODO add a parameter to delay the connect timeout?
         addDelegate(node, new DelegateTransport(original) {
             @Override
             public void connectToNode(DiscoveryNode node) throws ConnectTransportException {
@@ -143,8 +157,101 @@ public class MockTransportService extends TransportService {
         });
     }
 
+    /**
+     * Adds a rule that will cause ignores each send request, simulating an unresponsive node
+     * and failing to connect once the rule was added.
+     *
+     * @param duration the amount of time to delay sending and connecting.
+     */
+    public void addUnresponsiveRule(DiscoveryNode node, final TimeValue duration) {
+        final long startTime = System.currentTimeMillis();
+
+        addDelegate(node, new DelegateTransport(original) {
+
+            TimeValue getDelay() {
+                return new TimeValue(duration.millis() - (System.currentTimeMillis() - startTime));
+            }
+
+            @Override
+            public void connectToNode(DiscoveryNode node) throws ConnectTransportException {
+                TimeValue delay = getDelay();
+                if (delay.millis() <= 0) {
+                    original.connectToNode(node);
+                    return;
+                }
+
+                // TODO: Replace with proper setting
+                TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_DEFAULT_CONNECT_TIMEOUT;
+                try {
+                    if (delay.millis() < connectingTimeout.millis()) {
+                        Thread.sleep(delay.millis());
+                        original.connectToNode(node);
+                    } else {
+                        Thread.sleep(connectingTimeout.millis());
+                        throw new ConnectTransportException(node, "UNRESPONSIVE: simulated");
+                    }
+                } catch (InterruptedException e) {
+                    throw new ConnectTransportException(node, "UNRESPONSIVE: interrupted while sleeping", e);
+                }
+            }
+
+            @Override
+            public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException {
+                TimeValue delay = getDelay();
+                if (delay.millis() <= 0) {
+                    original.connectToNodeLight(node);
+                    return;
+                }
+
+                // TODO: Replace with proper setting
+                TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_DEFAULT_CONNECT_TIMEOUT;
+                try {
+                    if (delay.millis() < connectingTimeout.millis()) {
+                        Thread.sleep(delay.millis());
+                        original.connectToNodeLight(node);
+                    } else {
+                        Thread.sleep(connectingTimeout.millis());
+                        throw new ConnectTransportException(node, "UNRESPONSIVE: simulated");
+                    }
+                } catch (InterruptedException e) {
+                    throw new ConnectTransportException(node, "UNRESPONSIVE: interrupted while sleeping", e);
+                }
+            }
+
+            @Override
+            public void sendRequest(final DiscoveryNode node, final long requestId, final String action, TransportRequest request, final TransportRequestOptions options) throws IOException, TransportException {
+                // delayed sending - even if larger then the request timeout to simulated a potential late response from target node
+
+                TimeValue delay = getDelay();
+                if (delay.millis() <= 0) {
+                    original.sendRequest(node, requestId, action, request, options);
+                    return;
+                }
+
+                // poor mans request cloning...
+                TransportRequestHandler handler = MockTransportService.this.getHandler(action);
+                BytesStreamOutput bStream = new BytesStreamOutput();
+                request.writeTo(bStream);
+                final TransportRequest clonedRequest = handler.newInstance();
+                clonedRequest.readFrom(new BytesStreamInput(bStream.bytes()));
+
+                threadPool.schedule(delay, ThreadPool.Names.GENERIC, new AbstractRunnable() {
+                    @Override
+                    public void run() {
+                        try {
+                            original.sendRequest(node, requestId, action, clonedRequest, options);
+                        } catch (Throwable e) {
+                            logger.debug("failed to send delayed request", e);
+                        }
+                    }
+                });
+            }
+        });
+    }
+
     /**
      * Adds a new delegate transport that is used for communication with the given node.
+     *
      * @return <tt>true</tt> iff no other delegate was registered for this node before, otherwise <tt>false</tt>
      */
     public boolean addDelegate(DiscoveryNode node, DelegateTransport transport) {
@@ -209,12 +316,11 @@ public class MockTransportService extends TransportService {
 
         protected final Transport transport;
 
+
         public DelegateTransport(Transport transport) {
             this.transport = transport;
         }
 
-
-
         @Override
         public void transportServiceAdapter(TransportServiceAdapter service) {
             transport.transportServiceAdapter(service);