Improve CloseWhileRelocatingShardsIT (#37348)

This commit is contained in:
Tanguy Leroux 2019-01-14 13:14:36 +01:00 committed by GitHub
parent 6ca076bf74
commit 07dc8c7eee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 100 additions and 59 deletions

View File

@ -18,29 +18,40 @@
*/ */
package org.elasticsearch.indices.state; package org.elasticsearch.indices.state;
import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.admin.cluster.reroute.ClusterRerouteRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexRoutingTable;
import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRouting;
import org.elasticsearch.cluster.routing.allocation.command.AllocationCommands;
import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand; import org.elasticsearch.cluster.routing.allocation.command.MoveAllocationCommand;
import org.elasticsearch.cluster.routing.allocation.decider.ConcurrentRebalanceAllocationDecider;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider;
import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.Rebalance;
import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.rest.RestStatus; import org.elasticsearch.indices.recovery.PeerRecoverySourceService;
import org.elasticsearch.indices.recovery.StartRecoveryRequest;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.BackgroundIndexer; import org.elasticsearch.test.BackgroundIndexer;
import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.junit.annotations.TestLogging;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.TransportService;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.CountDownLatch;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import static org.elasticsearch.cluster.routing.allocation.decider.ConcurrentRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING; import static java.util.Collections.singletonList;
import static org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING;
import static org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING;
import static org.elasticsearch.indices.state.CloseIndexIT.assertException; import static org.elasticsearch.indices.state.CloseIndexIT.assertException;
import static org.elasticsearch.indices.state.CloseIndexIT.assertIndexIsClosed; import static org.elasticsearch.indices.state.CloseIndexIT.assertIndexIsClosed;
import static org.elasticsearch.indices.state.CloseIndexIT.assertIndexIsOpened; import static org.elasticsearch.indices.state.CloseIndexIT.assertIndexIsOpened;
@ -50,36 +61,52 @@ import static org.hamcrest.Matchers.greaterThan;
@ESIntegTestCase.ClusterScope(minNumDataNodes = 2) @ESIntegTestCase.ClusterScope(minNumDataNodes = 2)
public class CloseWhileRelocatingShardsIT extends ESIntegTestCase { public class CloseWhileRelocatingShardsIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return singletonList(MockTransportService.TestPlugin.class);
}
@Override @Override
protected Settings nodeSettings(int nodeOrdinal) { protected Settings nodeSettings(int nodeOrdinal) {
return Settings.builder() return Settings.builder()
.put(super.nodeSettings(nodeOrdinal)) .put(super.nodeSettings(nodeOrdinal))
.put(CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey(), 10) .put(ThrottlingAllocationDecider.CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey(), Integer.MAX_VALUE)
.put(CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(), -1) .put(ConcurrentRebalanceAllocationDecider.CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(), -1)
.build(); .build();
} }
@Override @Override
protected int numberOfReplicas() { protected int maximumNumberOfShards() {
return 1; return 3;
} }
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/37274") @TestLogging("org.elasticsearch.cluster.metadata.MetaDataIndexStateService:DEBUG,org.elasticsearch.action.admin.indices.close:DEBUG")
public void testCloseWhileRelocatingShards() throws Exception { public void testCloseWhileRelocatingShards() throws Exception {
final String[] indices = new String[randomIntBetween(1, 3)]; final String[] indices = new String[randomIntBetween(3, 5)];
final Map<String, Long> docsPerIndex = new HashMap<>(); final Map<String, Long> docsPerIndex = new HashMap<>();
final Map<String, BackgroundIndexer> indexers = new HashMap<>();
for (int i = 0; i < indices.length; i++) { for (int i = 0; i < indices.length; i++) {
final String indexName = "index-" + i; final String indexName = "index-" + i;
createIndex(indexName);
int nbDocs = 0; int nbDocs = 0;
if (randomBoolean()) { switch (i) {
nbDocs = randomIntBetween(1, 20); case 0:
for (int j = 0; j < nbDocs; j++) { logger.debug("creating empty index {}", indexName);
IndexResponse indexResponse = client().prepareIndex(indexName, "_doc").setSource("num", j).get(); createIndex(indexName);
assertEquals(RestStatus.CREATED, indexResponse.status()); break;
} case 1:
nbDocs = scaledRandomIntBetween(1, 100);
logger.debug("creating index {} with {} documents", indexName, nbDocs);
createIndex(indexName);
indexRandom(randomBoolean(), IntStream.range(0, nbDocs)
.mapToObj(n -> client().prepareIndex(indexName, "_doc").setSource("num", n))
.collect(Collectors.toList()));
break;
default:
logger.debug("creating index {} with background indexing", indexName);
final BackgroundIndexer indexer = new BackgroundIndexer(indexName, "_doc", client(), -1, 1);
indexers.put(indexName, indexer);
waitForDocs(1, indexer);
} }
docsPerIndex.put(indexName, (long) nbDocs); docsPerIndex.put(indexName, (long) nbDocs);
indices[i] = indexName; indices[i] = indexName;
@ -88,60 +115,72 @@ public class CloseWhileRelocatingShardsIT extends ESIntegTestCase {
ensureGreen(indices); ensureGreen(indices);
assertAcked(client().admin().cluster().prepareUpdateSettings() assertAcked(client().admin().cluster().prepareUpdateSettings()
.setTransientSettings(Settings.builder() .setTransientSettings(Settings.builder()
.put(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), EnableAllocationDecider.Rebalance.NONE.toString()))); .put(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey(), Rebalance.NONE.toString())));
// start some concurrent indexing threads final String targetNode = internalCluster().startDataOnlyNode();
final Map<String, BackgroundIndexer> indexers = new HashMap<>(); ensureClusterSizeConsistency(); // wait for the master to finish processing join.
for (final String index : indices) {
if (randomBoolean()) {
final BackgroundIndexer indexer = new BackgroundIndexer(index, "_doc", client(), -1, scaledRandomIntBetween(1, 3));
waitForDocs(1, indexer);
indexers.put(index, indexer);
}
}
final Set<String> acknowledgedCloses = ConcurrentCollections.newConcurrentSet(); final Set<String> acknowledgedCloses = ConcurrentCollections.newConcurrentSet();
final String newNode = internalCluster().startDataOnlyNode();
try { try {
final CountDownLatch latch = new CountDownLatch(1); final ClusterService clusterService = internalCluster().getInstance(ClusterService.class, internalCluster().getMasterName());
final List<Thread> threads = new ArrayList<>(); final CountDownLatch latch = new CountDownLatch(indices.length);
final CountDownLatch release = new CountDownLatch(1);
// start shards relocating threads // relocate one shard for every index to be closed
final ClusterState clusterState = client().admin().cluster().prepareState().get().getState(); final AllocationCommands commands = new AllocationCommands();
for (final String indexToRelocate : indices) { for (final String index : indices) {
final IndexRoutingTable indexRoutingTable = clusterState.routingTable().index(indexToRelocate); final NumShards numShards = getNumShards(index);
for (int i = 0; i < getNumShards(indexToRelocate).numPrimaries; i++) { final int shardId = numShards.numPrimaries == 1 ? 0 : randomIntBetween(0, numShards.numPrimaries - 1);
final int shardId = i; final IndexRoutingTable indexRoutingTable = clusterService.state().routingTable().index(index);
ShardRouting primary = indexRoutingTable.shard(shardId).primaryShard();
assertTrue(primary.started()); final ShardRouting primary = indexRoutingTable.shard(shardId).primaryShard();
ShardRouting replica = indexRoutingTable.shard(shardId).replicaShards().iterator().next(); assertTrue(primary.started());
String currentNodeId = primary.currentNodeId();
if (numShards.numReplicas > 0) {
final ShardRouting replica = indexRoutingTable.shard(shardId).replicaShards().iterator().next();
assertTrue(replica.started()); assertTrue(replica.started());
if (randomBoolean()) {
final String currentNodeId = randomBoolean() ? primary.currentNodeId() : replica.currentNodeId(); currentNodeId = replica.currentNodeId();
assertNotNull(currentNodeId); }
final Thread thread = new Thread(() -> {
try {
latch.await();
} catch (InterruptedException e) {
throw new AssertionError(e);
}
assertAcked(client().admin().cluster().prepareReroute()
.add(new MoveAllocationCommand(indexToRelocate, shardId, currentNodeId, newNode)));
});
threads.add(thread);
thread.start();
} }
final DiscoveryNode sourceNode = clusterService.state().nodes().resolveNode(primary.currentNodeId());
((MockTransportService) internalCluster().getInstance(TransportService.class, targetNode))
.addSendBehavior(internalCluster().getInstance(TransportService.class, sourceNode.getName()),
(connection, requestId, action, request, options) -> {
if (PeerRecoverySourceService.Actions.START_RECOVERY.equals(action)) {
logger.debug("blocking recovery of shard {}", ((StartRecoveryRequest) request).shardId());
latch.countDown();
try {
release.await();
logger.debug("releasing recovery of shard {}", ((StartRecoveryRequest) request).shardId());
} catch (InterruptedException e) {
throw new AssertionError(e);
}
}
connection.sendRequest(requestId, action, request, options);
}
);
commands.add(new MoveAllocationCommand(index, shardId, currentNodeId, targetNode));
} }
assertAcked(client().admin().cluster().reroute(new ClusterRerouteRequest().commands(commands)).get());
// start index closing threads // start index closing threads
final List<Thread> threads = new ArrayList<>();
for (final String indexToClose : indices) { for (final String indexToClose : indices) {
final Thread thread = new Thread(() -> { final Thread thread = new Thread(() -> {
try { try {
latch.await(); latch.await();
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new AssertionError(e); throw new AssertionError(e);
} finally {
release.countDown();
} }
// Closing is not always acknowledged when shards are relocating: this is the case when the target shard is initializing
// or is catching up operations. In these cases the TransportVerifyShardBeforeCloseAction will detect that the global
// and max sequence number don't match and will not ack the close.
AcknowledgedResponse closeResponse = client().admin().indices().prepareClose(indexToClose).get(); AcknowledgedResponse closeResponse = client().admin().indices().prepareClose(indexToClose).get();
if (closeResponse.isAcknowledged()) { if (closeResponse.isAcknowledged()) {
assertTrue("Index closing should not be acknowledged twice", acknowledgedCloses.add(indexToClose)); assertTrue("Index closing should not be acknowledged twice", acknowledgedCloses.add(indexToClose));
@ -155,6 +194,7 @@ public class CloseWhileRelocatingShardsIT extends ESIntegTestCase {
for (Thread thread : threads) { for (Thread thread : threads) {
thread.join(); thread.join();
} }
for (Map.Entry<String, BackgroundIndexer> entry : indexers.entrySet()) { for (Map.Entry<String, BackgroundIndexer> entry : indexers.entrySet()) {
final BackgroundIndexer indexer = entry.getValue(); final BackgroundIndexer indexer = entry.getValue();
indexer.setAssertNoFailuresOnStop(false); indexer.setAssertNoFailuresOnStop(false);
@ -172,7 +212,8 @@ public class CloseWhileRelocatingShardsIT extends ESIntegTestCase {
} }
} finally { } finally {
assertAcked(client().admin().cluster().prepareUpdateSettings() assertAcked(client().admin().cluster().prepareUpdateSettings()
.setTransientSettings(Settings.builder().putNull(CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey()))); .setTransientSettings(Settings.builder()
.putNull(EnableAllocationDecider.CLUSTER_ROUTING_REBALANCE_ENABLE_SETTING.getKey())));
} }
for (String index : indices) { for (String index : indices) {