Always flush in FullClusterRestartIT#testRecovery (#47465)

The pattern in the latest failure is similar to the source fixed in #46956
but relates to synced-flush. If peer recovery happens after indexing,
and indexing flushes some shard at the end, then a synced flush in the
test will not roll or commit translog.

Closes #46712
This commit is contained in:
Nhat Nguyen 2019-10-02 17:30:55 -04:00
parent 0beb5263b4
commit 44fdf2020a
1 changed files with 14 additions and 21 deletions

View File

@ -733,28 +733,21 @@ public class FullClusterRestartIT extends AbstractFullClusterRestartTestCase {
// make sure all recoveries are done // make sure all recoveries are done
ensureGreen(index); ensureGreen(index);
// Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
if (randomBoolean()) { // Force flush so we're sure that all translog are committed
// needs to call a replication action to sync the global checkpoint from primaries to replication.
assertOK(client().performRequest(new Request("POST", "/" + index + "/_refresh")));
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
assertBusy(() -> {
try {
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
assertThat(result.get("successful"), equalTo(result.get("total")));
assertThat(result.get("failed"), equalTo(0));
} catch (ResponseException ex) {
throw new AssertionError(ex); // cause assert busy to retry
}
});
} else {
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
Request flushRequest = new Request("POST", "/" + index + "/_flush"); Request flushRequest = new Request("POST", "/" + index + "/_flush");
flushRequest.addParameter("force", "true"); flushRequest.addParameter("force", "true");
flushRequest.addParameter("wait_if_ongoing", "true"); flushRequest.addParameter("wait_if_ongoing", "true");
assertOK(client().performRequest(flushRequest)); assertOK(client().performRequest(flushRequest));
if (randomBoolean()) {
// We had a bug before where we failed to perform peer recovery with sync_id from 5.x to 6.x.
// We added this synced flush so we can exercise different paths of recovery code.
try {
client().performRequest(new Request("POST", index + "/_flush/synced"));
} catch (ResponseException ignored) {
// synced flush is optional here
}
} }
if (shouldHaveTranslog) { if (shouldHaveTranslog) {
// Update a few documents so we are sure to have a translog // Update a few documents so we are sure to have a translog