Add information about in-flight requests when checking IndexShard operation counter (#21308)

Our test infrastructure checks after running each test that there are no more in-flight requests on the shard level. Whenever the check fails, we only know that there were in-flight requests but don't know what requests were causing this issue. This commit adds the replication tasks that are still active at that moment to the assertion error.
This commit is contained in:
Yannick Welsch 2016-11-03 18:36:07 +01:00 committed by GitHub
parent 00e7026778
commit 39f4229594
4 changed files with 29 additions and 6 deletions

View File

@ -173,7 +173,7 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
}
@Override
protected void beforeIndexDeletion() {
protected void beforeIndexDeletion() throws IOException {
if (disableBeforeIndexDeletion == false) {
super.beforeIndexDeletion();
}

View File

@ -575,7 +575,7 @@ public abstract class ESIntegTestCase extends ESTestCase {
return Collections.emptySet();
}
protected void beforeIndexDeletion() {
protected void beforeIndexDeletion() throws IOException {
cluster().beforeIndexDeletion();
}

View File

@ -31,8 +31,10 @@ import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.node.stats.NodeStats;
import org.elasticsearch.action.admin.cluster.node.tasks.list.ListTasksResponse;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags;
import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags.Flag;
import org.elasticsearch.action.support.replication.ReplicationTask;
import org.elasticsearch.client.Client;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.cluster.ClusterName;
@ -63,6 +65,8 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.PageCacheRecycler;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.discovery.DiscoverySettings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.NodeEnvironment;
@ -87,6 +91,8 @@ import org.elasticsearch.node.service.NodeService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.tasks.TaskInfo;
import org.elasticsearch.tasks.TaskManager;
import org.elasticsearch.test.disruption.ServiceDisruptionScheme;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.MockTransportClient;
@ -1013,7 +1019,7 @@ public final class InternalTestCluster extends TestCluster {
}
@Override
public void beforeIndexDeletion() {
public void beforeIndexDeletion() throws IOException {
// Check that the operations counter on index shard has reached 0.
// The assumption here is that after a test there are no ongoing write operations.
// test that have ongoing write operations after the test (for example because ttl is used
@ -1048,13 +1054,30 @@ public final class InternalTestCluster extends TestCluster {
}
}
private void assertShardIndexCounter() {
private void assertShardIndexCounter() throws IOException {
final Collection<NodeAndClient> nodesAndClients = nodes.values();
for (NodeAndClient nodeAndClient : nodesAndClients) {
IndicesService indexServices = getInstance(IndicesService.class, nodeAndClient.name);
for (IndexService indexService : indexServices) {
for (IndexShard indexShard : indexService) {
assertThat("index shard counter on shard " + indexShard.shardId() + " on node " + nodeAndClient.name + " not 0", indexShard.getActiveOperationsCount(), equalTo(0));
int activeOperationsCount = indexShard.getActiveOperationsCount();
if (activeOperationsCount > 0) {
TaskManager taskManager = getInstance(TransportService.class, nodeAndClient.name).getTaskManager();
DiscoveryNode localNode = getInstance(ClusterService.class, nodeAndClient.name).localNode();
List<TaskInfo> taskInfos = taskManager.getTasks().values().stream()
.filter(task -> task instanceof ReplicationTask)
.map(task -> task.taskInfo(localNode, true))
.collect(Collectors.toList());
ListTasksResponse response = new ListTasksResponse(taskInfos, Collections.emptyList(), Collections.emptyList());
XContentBuilder builder = XContentFactory.jsonBuilder()
.prettyPrint()
.startObject()
.value(response)
.endObject();
throw new AssertionError("expected index shard counter on shard " + indexShard.shardId() + " on node " +
nodeAndClient.name + " to be 0 but was " + activeOperationsCount + ". Current replication tasks on node:\n" +
builder.string());
}
}
}
}

View File

@ -82,7 +82,7 @@ public abstract class TestCluster implements Closeable {
/**
* Assertions that should run before the cluster is wiped should be called in this method
*/
public void beforeIndexDeletion() {
public void beforeIndexDeletion() throws IOException {
}
/**