Always flush in FullClusterRestartIT#testRecovery (#47465)
The pattern in the latest failure is similar to the source fixed in #46956 but relates to synced-flush. If peer recovery happens after indexing, and indexing flushes some shard at the end, then a synced flush in the test will not roll or commit translog. Closes #46712
This commit is contained in:
parent
0beb5263b4
commit
44fdf2020a
|
@ -733,28 +733,21 @@ public class FullClusterRestartIT extends AbstractFullClusterRestartTestCase {
|
||||||
|
|
||||||
// make sure all recoveries are done
|
// make sure all recoveries are done
|
||||||
ensureGreen(index);
|
ensureGreen(index);
|
||||||
// Recovering a synced-flush index from 5.x to 6.x might be subtle as a 5.x index commit does not have all 6.x commit tags.
|
|
||||||
if (randomBoolean()) {
|
// Force flush so we're sure that all translog are committed
|
||||||
// needs to call a replication action to sync the global checkpoint from primaries to replication.
|
|
||||||
assertOK(client().performRequest(new Request("POST", "/" + index + "/_refresh")));
|
|
||||||
// We have to spin synced-flush requests here because we fire the global checkpoint sync for the last write operation.
|
|
||||||
// A synced-flush request considers the global checkpoint sync as an going operation because it acquires a shard permit.
|
|
||||||
assertBusy(() -> {
|
|
||||||
try {
|
|
||||||
Response resp = client().performRequest(new Request("POST", index + "/_flush/synced"));
|
|
||||||
Map<String, Object> result = ObjectPath.createFromResponse(resp).evaluate("_shards");
|
|
||||||
assertThat(result.get("successful"), equalTo(result.get("total")));
|
|
||||||
assertThat(result.get("failed"), equalTo(0));
|
|
||||||
} catch (ResponseException ex) {
|
|
||||||
throw new AssertionError(ex); // cause assert busy to retry
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
// Explicitly flush so we're sure to have a bunch of documents in the Lucene index
|
|
||||||
Request flushRequest = new Request("POST", "/" + index + "/_flush");
|
Request flushRequest = new Request("POST", "/" + index + "/_flush");
|
||||||
flushRequest.addParameter("force", "true");
|
flushRequest.addParameter("force", "true");
|
||||||
flushRequest.addParameter("wait_if_ongoing", "true");
|
flushRequest.addParameter("wait_if_ongoing", "true");
|
||||||
assertOK(client().performRequest(flushRequest));
|
assertOK(client().performRequest(flushRequest));
|
||||||
|
|
||||||
|
if (randomBoolean()) {
|
||||||
|
// We had a bug before where we failed to perform peer recovery with sync_id from 5.x to 6.x.
|
||||||
|
// We added this synced flush so we can exercise different paths of recovery code.
|
||||||
|
try {
|
||||||
|
client().performRequest(new Request("POST", index + "/_flush/synced"));
|
||||||
|
} catch (ResponseException ignored) {
|
||||||
|
// synced flush is optional here
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (shouldHaveTranslog) {
|
if (shouldHaveTranslog) {
|
||||||
// Update a few documents so we are sure to have a translog
|
// Update a few documents so we are sure to have a translog
|
||||||
|
|
Loading…
Reference in New Issue