Improve stability of the testBatchingShardUpdateTask test

On slow machines when this test randomly picks a large number of shards it can occasionally take more than 32.5 seconds to snapshot all shards. That is causing the test to miss the second to last assert in awaitsBusy at 32.5 seconds and then timeout in BlockingClusterStateListener at 60 seconds. Due to the timeout, the pending task queue is cleaned before the last awaitsBusy assert at 65 seconds and as a result the last assert runs on a completely empty queue and fails with a very confusing assert error.

This commit makes the timeout in BlockingClusterStateListener to occur after the last assert in assertBusyPendingTasks and therefore allows assertBusyPendingTasks to perform the last assert before cleaning the pending tasks queue takes place.

 This commit also reduces the maximum number of shards used in the test to 10 in order to speed up this test.
This commit is contained in:
Igor Motov 2016-01-07 19:33:50 -05:00
parent dcd8a8207f
commit 8fbb3686cd
2 changed files with 5 additions and 2 deletions

View File

@ -176,7 +176,10 @@ public abstract class AbstractSnapshotIntegTestCase extends ESIntegTestCase {
private long stopWaitingAt = -1; private long stopWaitingAt = -1;
public BlockingClusterStateListener(ClusterService clusterService, String blockOn, String countOn, Priority passThroughPriority) { public BlockingClusterStateListener(ClusterService clusterService, String blockOn, String countOn, Priority passThroughPriority) {
this(clusterService, blockOn, countOn, passThroughPriority, TimeValue.timeValueMinutes(1)); // Waiting for the 70 seconds here to make sure that the last check at 65 sec mark in assertBusyPendingTasks has a chance
// to finish before we timeout on the cluster state block. Otherwise the last check in assertBusyPendingTasks kicks in
// after the cluster state block clean up takes place and it's assert doesn't reflect the actual failure
this(clusterService, blockOn, countOn, passThroughPriority, TimeValue.timeValueSeconds(70));
} }
public BlockingClusterStateListener(ClusterService clusterService, final String blockOn, final String countOn, Priority passThroughPriority, TimeValue timeout) { public BlockingClusterStateListener(ClusterService clusterService, final String blockOn, final String countOn, Priority passThroughPriority, TimeValue timeout) {

View File

@ -1943,7 +1943,7 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
.put("compress", randomBoolean()) .put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES))); .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
assertAcked(prepareCreate("test-idx", 0, settingsBuilder().put("number_of_shards", between(1, 20)) assertAcked(prepareCreate("test-idx", 0, settingsBuilder().put("number_of_shards", between(1, 10))
.put("number_of_replicas", 0))); .put("number_of_replicas", 0)));
ensureGreen(); ensureGreen();