diff --git a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java index 57c6ad7ff86..081a1918674 100644 --- a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java +++ b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java @@ -24,7 +24,9 @@ import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; import org.apache.http.util.EntityUtils; import org.elasticsearch.Version; +import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseException; import org.elasticsearch.client.RestClient; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Booleans; @@ -701,8 +703,24 @@ public class FullClusterRestartIT extends ESRestTestCase { // make sure all recoveries are done ensureGreen(index); - // Explicitly flush so we're sure to have a bunch of documents in the Lucene index - client().performRequest("POST", "/_flush"); + // Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags. + if (randomBoolean()) { + // We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation. + // A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit. + assertBusy(() -> { + try { + Response resp = client().performRequest(new Request("POST", index + "/_flush/synced")); + Map result = ObjectPath.createFromResponse(resp).evaluate("_shards"); + assertThat(result.get("successful"), equalTo(result.get("total"))); + assertThat(result.get("failed"), equalTo(0)); + } catch (ResponseException ex) { + throw new AssertionError(ex); // cause assert busy to retry + } + }); + } else { + // Explicitly flush so we're sure to have a bunch of documents in the Lucene index + assertOK(client().performRequest(new Request("POST", "/_flush"))); + } if (shouldHaveTranslog) { // Update a few documents so we are sure to have a translog indexRandomDocuments(count / 10, false /* Flushing here would invalidate the whole thing....*/, false, diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java index 1351de16cf7..809cd40d698 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java @@ -22,7 +22,9 @@ import org.apache.http.entity.ContentType; import org.apache.http.entity.StringEntity; import org.elasticsearch.Version; import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.AbstractRunnable; @@ -239,4 +241,34 @@ public class RecoveryIT extends AbstractRollingTestCase { } } + public void testRecoverSyncedFlushIndex() throws Exception { + final String index = "recover_synced_flush_index"; + if (CLUSTER_TYPE == ClusterType.OLD) { + Settings.Builder settings = Settings.builder() + .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) + .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1) + // if the node with the replica is the first to be restarted, while a replica is still recovering + // then delayed allocation will kick in. When the node comes back, the master will search for a copy + // but the recovering copy will be seen as invalid and the cluster health won't return to GREEN + // before timing out + .put(INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "100ms") + .put(SETTING_ALLOCATION_MAX_RETRY.getKey(), "0"); // fail faster + createIndex(index, settings.build()); + indexDocs(index, 0, randomInt(5)); + // We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation. + // A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit. + assertBusy(() -> { + try { + Response resp = client().performRequest(new Request("POST", index + "/_flush/synced")); + Map result = ObjectPath.createFromResponse(resp).evaluate("_shards"); + assertThat(result.get("successful"), equalTo(result.get("total"))); + assertThat(result.get("failed"), equalTo(0)); + } catch (ResponseException ex) { + throw new AssertionError(ex); // cause assert busy to retry + } + }); + } + ensureGreen(index); + } + }