Always flush in FullClusterRestartIT#testRecovery (#47465)

The pattern in the latest failure is similar to the source fixed in #46956 but relates to synced-flush. If peer recovery happens after indexing, and indexing flushes some shard at the end, then a synced flush in the test will not roll or commit translog. Closes #46712
2019-10-02 17:30:55 -04:00 · 2019-10-02 17:30:55 -04:00 · 44fdf2020a
parent 0beb5263b4
commit 44fdf2020a
1 changed files with 14 additions and 21 deletions
--- a/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
+++ b/qa/full-cluster-restart/src/test/java/org/elasticsearch/upgrades/FullClusterRestartIT.java
@ -733,28 +733,21 @@ public class FullClusterRestartIT extends AbstractFullClusterRestartTestCase {
            // make sure all recoveries are done
            ensureGreen(index);
-            // Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
+
-            if (randomBoolean()) {
+            // Force flush so we're sure that all translog are committed
                // needs to call a replication action to sync the global checkpoint from primaries to replication.
                assertOK(client().performRequest(new Request("POST", "/" + index + "/_refresh")));
                // We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
                // A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
                assertBusy(() -> {
                    try {
                        Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
                        Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
                        assertThat(result.get("successful"), equalTo(result.get("total")));
                        assertThat(result.get("failed"), equalTo(0));
                    } catch (ResponseException ex) {
                        throw new AssertionError(ex); // cause assert busy to retry
                    }
                });
            } else {
                // Explicitly flush so we're sure to have a bunch of documents in the Lucene index
            Request flushRequest = new Request("POST", "/" + index + "/_flush");
            flushRequest.addParameter("force", "true");
            flushRequest.addParameter("wait_if_ongoing", "true");
            assertOK(client().performRequest(flushRequest));
            if (randomBoolean()) {
                // We had a bug before where we failed to perform peer recovery with sync_id from 5.x to 6.x.
                // We added this synced flush so we can exercise different paths of recovery code.
                try {
                    client().performRequest(new Request("POST", index + "/_flush/synced"));
                } catch (ResponseException ignored) {
                    // synced flush is optional here
                }
            }
            if (shouldHaveTranslog) {
                // Update a few documents so we are sure to have a translog