enable testAckedIndexing

This commit is contained in:
Boaz Leskes 2016-02-12 10:06:25 +01:00
parent 15a9da4d84
commit 74194b8f0f
1 changed files with 97 additions and 76 deletions

View File

@ -20,7 +20,6 @@
package org.elasticsearch.discovery;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.util.LuceneTestCase;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
@ -147,7 +146,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
return startCluster(numberOfNodes, minimumMasterNode, null);
}
private List<String> startCluster(int numberOfNodes, int minimumMasterNode, @Nullable int[] unicastHostsOrdinals) throws ExecutionException, InterruptedException {
private List<String> startCluster(int numberOfNodes, int minimumMasterNode, @Nullable int[] unicastHostsOrdinals) throws
ExecutionException, InterruptedException {
configureUnicastCluster(numberOfNodes, unicastHostsOrdinals, minimumMasterNode);
List<String> nodes = internalCluster().startNodesAsync(numberOfNodes).get();
ensureStableCluster(numberOfNodes);
@ -176,7 +176,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
return pluginList(MockTransportService.TestPlugin.class);
}
private void configureUnicastCluster(int numberOfNodes, @Nullable int[] unicastHostsOrdinals, int minimumMasterNode) throws ExecutionException, InterruptedException {
private void configureUnicastCluster(int numberOfNodes, @Nullable int[] unicastHostsOrdinals, int minimumMasterNode) throws
ExecutionException, InterruptedException {
if (minimumMasterNode < 0) {
minimumMasterNode = numberOfNodes / 2 + 1;
}
@ -251,7 +252,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
logger.info("--> reducing min master nodes to 2");
assertAcked(client().admin().cluster().prepareUpdateSettings()
.setTransientSettings(Settings.builder().put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2)).get());
.setTransientSettings(Settings.builder().put(ElectMasterService.DISCOVERY_ZEN_MINIMUM_MASTER_NODES_SETTING.getKey(), 2))
.get());
String master = internalCluster().getMasterName();
String nonMaster = null;
@ -276,8 +278,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
// Makes sure that the get request can be executed on each node locally:
assertAcked(prepareCreate("test").setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
));
// Everything is stable now, it is now time to simulate evil...
@ -357,8 +359,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2))
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2))
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))
));
ensureGreen();
@ -378,7 +380,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
networkPartition.stopDisrupting();
for (String node : nodes) {
ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkPartition.expectedTimeToHeal().millis()), true, node);
ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + networkPartition.expectedTimeToHeal().millis()),
true, node);
}
logger.info("issue a reroute");
@ -419,17 +422,16 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
* <p>
* This test is a superset of tests run in the Jepsen test suite, with the exception of versioned updates
*/
// NOTE: if you remove the awaitFix, make sure to port the test to the 1.x branch
@LuceneTestCase.AwaitsFix(bugUrl = "needs some more work to stabilize")
@TestLogging("_root:DEBUG,action.index:TRACE,action.get:TRACE,discovery:TRACE,cluster.service:TRACE,indices.recovery:TRACE,indices.cluster:TRACE")
@TestLogging("_root:DEBUG,action.index:TRACE,action.get:TRACE,discovery:TRACE,cluster.service:TRACE,"
+ "indices.recovery:TRACE,indices.cluster:TRACE")
public void testAckedIndexing() throws Exception {
// TODO: add node count randomizaion
final List<String> nodes = startCluster(3);
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2))
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1 + randomInt(2))
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, randomInt(2))
));
ensureGreen();
@ -453,36 +455,33 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
final Client client = client(node);
final String name = "indexer_" + indexers.size();
final int numPrimaries = getNumShards("test").numPrimaries;
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
while (!stop.get()) {
String id = null;
try {
if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
continue;
}
logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
try {
id = Integer.toString(idGenerator.incrementAndGet());
int shard = MathUtils.mod(Murmur3HashFunction.hash(id), numPrimaries);
logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}").setTimeout("1s").get();
assertThat(response.getVersion(), equalTo(1L));
ackedDocs.put(id, node);
logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node);
} catch (ElasticsearchException e) {
exceptedExceptions.add(e);
logger.trace("[{}] failed id [{}] through node [{}]", e, name, id, node);
} finally {
countDownLatchRef.get().countDown();
logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
}
} catch (InterruptedException e) {
// fine - semaphore interrupt
} catch (Throwable t) {
logger.info("unexpected exception in background thread of [{}]", t, node);
Thread thread = new Thread(() -> {
while (!stop.get()) {
String id = null;
try {
if (!semaphore.tryAcquire(10, TimeUnit.SECONDS)) {
continue;
}
logger.info("[{}] Acquired semaphore and it has {} permits left", name, semaphore.availablePermits());
try {
id = Integer.toString(idGenerator.incrementAndGet());
int shard = MathUtils.mod(Murmur3HashFunction.hash(id), numPrimaries);
logger.trace("[{}] indexing id [{}] through node [{}] targeting shard [{}]", name, id, node, shard);
IndexResponse response = client.prepareIndex("test", "type", id).setSource("{}").setTimeout("1s").get("1s");
assertThat(response.getVersion(), equalTo(1L));
ackedDocs.put(id, node);
logger.trace("[{}] indexed id [{}] through node [{}]", name, id, node);
} catch (ElasticsearchException e) {
exceptedExceptions.add(e);
logger.trace("[{}] failed id [{}] through node [{}]", e, name, id, node);
} finally {
countDownLatchRef.get().countDown();
logger.trace("[{}] decreased counter : {}", name, countDownLatchRef.get().getCount());
}
} catch (InterruptedException e) {
// fine - semaphore interrupt
} catch (Throwable t) {
logger.info("unexpected exception in background thread of [{}]", t, node);
}
}
});
@ -512,11 +511,13 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
assertThat(semaphore.availablePermits(), equalTo(0));
semaphore.release(docsPerIndexer);
}
assertTrue(countDownLatchRef.get().await(60000 + disruptionScheme.expectedTimeToHeal().millis() * (docsPerIndexer * indexers.size()), TimeUnit.MILLISECONDS));
logger.info("waiting for indexing requests to complete");
assertTrue(countDownLatchRef.get().await(docsPerIndexer * 1000 + 2000, TimeUnit.MILLISECONDS));
logger.info("stopping disruption");
disruptionScheme.stopDisrupting();
ensureStableCluster(3, TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() + DISRUPTION_HEALING_OVERHEAD.millis()));
ensureStableCluster(3, TimeValue.timeValueMillis(disruptionScheme.expectedTimeToHeal().millis() +
DISRUPTION_HEALING_OVERHEAD.millis()));
ensureGreen("test");
logger.info("validating successful docs");
@ -581,7 +582,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
// restore GC
masterNodeDisruption.stopDisrupting();
ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + masterNodeDisruption.expectedTimeToHeal().millis()), false,
ensureStableCluster(3, new TimeValue(DISRUPTION_HEALING_OVERHEAD.millis() + masterNodeDisruption.expectedTimeToHeal().millis()),
false,
oldNonMasterNodes.get(0));
// make sure all nodes agree on master
@ -614,7 +616,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
majoritySide.remove(oldMasterNode);
// Keeps track of the previous and current master when a master node transition took place on each node on the majority side:
final Map<String, List<Tuple<String, String>>> masters = Collections.synchronizedMap(new HashMap<String, List<Tuple<String, String>>>());
final Map<String, List<Tuple<String, String>>> masters = Collections.synchronizedMap(new HashMap<String, List<Tuple<String,
String>>>());
for (final String node : majoritySide) {
masters.put(node, new ArrayList<Tuple<String, String>>());
internalCluster().getInstance(ClusterService.class, node).add(new ClusterStateListener() {
@ -623,7 +626,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
DiscoveryNode previousMaster = event.previousState().nodes().getMasterNode();
DiscoveryNode currentMaster = event.state().nodes().getMasterNode();
if (!Objects.equals(previousMaster, currentMaster)) {
logger.info("node {} received new cluster state: {} \n and had previous cluster state: {}", node, event.state(), event.previousState());
logger.info("node {} received new cluster state: {} \n and had previous cluster state: {}", node, event.state(),
event.previousState());
String previousMasterNodeName = previousMaster != null ? previousMaster.name() : null;
String currentMasterNodeName = currentMaster != null ? currentMaster.name() : null;
masters.get(node).add(new Tuple<>(previousMasterNodeName, currentMasterNodeName));
@ -655,7 +659,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
// but will be queued and once the old master node un-freezes it gets executed.
// The old master node will send this update + the cluster state where he is flagged as master to the other
// nodes that follow the new master. These nodes should ignore this update.
internalCluster().getInstance(ClusterService.class, oldMasterNode).submitStateUpdateTask("sneaky-update", new ClusterStateUpdateTask(Priority.IMMEDIATE) {
internalCluster().getInstance(ClusterService.class, oldMasterNode).submitStateUpdateTask("sneaky-update", new
ClusterStateUpdateTask(Priority.IMMEDIATE) {
@Override
public ClusterState execute(ClusterState currentState) throws Exception {
return ClusterState.builder(currentState).build();
@ -692,11 +697,16 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
for (Map.Entry<String, List<Tuple<String, String>>> entry : masters.entrySet()) {
String nodeName = entry.getKey();
List<Tuple<String, String>> recordedMasterTransition = entry.getValue();
assertThat("[" + nodeName + "] Each node should only record two master node transitions", recordedMasterTransition.size(), equalTo(2));
assertThat("[" + nodeName + "] First transition's previous master should be [null]", recordedMasterTransition.get(0).v1(), equalTo(oldMasterNode));
assertThat("[" + nodeName + "] First transition's current master should be [" + newMasterNode + "]", recordedMasterTransition.get(0).v2(), nullValue());
assertThat("[" + nodeName + "] Second transition's previous master should be [null]", recordedMasterTransition.get(1).v1(), nullValue());
assertThat("[" + nodeName + "] Second transition's current master should be [" + newMasterNode + "]", recordedMasterTransition.get(1).v2(), equalTo(newMasterNode));
assertThat("[" + nodeName + "] Each node should only record two master node transitions", recordedMasterTransition.size(),
equalTo(2));
assertThat("[" + nodeName + "] First transition's previous master should be [null]", recordedMasterTransition.get(0).v1(),
equalTo(oldMasterNode));
assertThat("[" + nodeName + "] First transition's current master should be [" + newMasterNode + "]", recordedMasterTransition
.get(0).v2(), nullValue());
assertThat("[" + nodeName + "] Second transition's previous master should be [null]", recordedMasterTransition.get(1).v1(),
nullValue());
assertThat("[" + nodeName + "] Second transition's current master should be [" + newMasterNode + "]",
recordedMasterTransition.get(1).v2(), equalTo(newMasterNode));
}
}
@ -709,8 +719,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
)
.get());
ensureGreen("test");
@ -726,7 +736,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
assertFalse(client(notIsolatedNode).admin().cluster().prepareHealth("test").setWaitForYellowStatus().get().isTimedOut());
IndexResponse indexResponse = internalCluster().client(notIsolatedNode).prepareIndex("test", "type").setSource("field", "value").get();
IndexResponse indexResponse = internalCluster().client(notIsolatedNode).prepareIndex("test", "type").setSource("field", "value")
.get();
assertThat(indexResponse.getVersion(), equalTo(1L));
logger.info("Verifying if document exists via node[" + notIsolatedNode + "]");
@ -844,17 +855,21 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
DiscoveryNodes discoveryNodes = internalCluster().getInstance(ClusterService.class, nonMasterNode).state().nodes();
TransportService masterTranspotService = internalCluster().getInstance(TransportService.class, discoveryNodes.masterNode().getName());
TransportService masterTranspotService = internalCluster().getInstance(TransportService.class, discoveryNodes.masterNode()
.getName());
logger.info("blocking requests from non master [{}] to master [{}]", nonMasterNode, masterNode);
MockTransportService nonMasterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, nonMasterNode);
MockTransportService nonMasterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class,
nonMasterNode);
nonMasterTransportService.addFailToSendNoConnectRule(masterTranspotService);
assertNoMaster(nonMasterNode);
logger.info("blocking cluster state publishing from master [{}] to non master [{}]", masterNode, nonMasterNode);
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNode);
TransportService localTransportService = internalCluster().getInstance(TransportService.class, discoveryNodes.localNode().getName());
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class,
masterNode);
TransportService localTransportService = internalCluster().getInstance(TransportService.class, discoveryNodes.localNode().getName
());
if (randomBoolean()) {
masterTransportService.addFailToSendNoConnectRule(localTransportService, PublishClusterStateAction.SEND_ACTION_NAME);
} else {
@ -863,9 +878,11 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
logger.info("allowing requests from non master [{}] to master [{}], waiting for two join request", nonMasterNode, masterNode);
final CountDownLatch countDownLatch = new CountDownLatch(2);
nonMasterTransportService.addDelegate(masterTranspotService, new MockTransportService.DelegateTransport(nonMasterTransportService.original()) {
nonMasterTransportService.addDelegate(masterTranspotService, new MockTransportService.DelegateTransport(nonMasterTransportService
.original()) {
@Override
public void sendRequest(DiscoveryNode node, long requestId, String action, TransportRequest request, TransportRequestOptions options) throws IOException, TransportException {
public void sendRequest(DiscoveryNode node, long requestId, String action, TransportRequest request, TransportRequestOptions
options) throws IOException, TransportException {
if (action.equals(MembershipAction.DISCOVERY_JOIN_ACTION_NAME)) {
countDownLatch.countDown();
}
@ -893,16 +910,16 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
List<String> nonMasterNodes = nodes.stream().filter(node -> !node.equals(masterNode)).collect(Collectors.toList());
String nonMasterNode = randomFrom(nonMasterNodes);
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
));
.setSettings(Settings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 3)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 2)
));
ensureGreen();
String nonMasterNodeId = internalCluster().clusterService(nonMasterNode).localNode().getId();
// fail a random shard
ShardRouting failedShard =
randomFrom(clusterService().state().getRoutingNodes().node(nonMasterNodeId).shardsWithState(ShardRoutingState.STARTED));
randomFrom(clusterService().state().getRoutingNodes().node(nonMasterNodeId).shardsWithState(ShardRoutingState.STARTED));
ShardStateAction service = internalCluster().getInstance(ShardStateAction.class, nonMasterNode);
CountDownLatch latch = new CountDownLatch(1);
AtomicBoolean success = new AtomicBoolean();
@ -911,7 +928,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
NetworkPartition networkPartition = addRandomIsolation(isolatedNode);
networkPartition.startDisrupting();
service.shardFailed(failedShard, failedShard, "simulated", new CorruptIndexException("simulated", (String) null), new ShardStateAction.Listener() {
service.shardFailed(failedShard, failedShard, "simulated", new CorruptIndexException("simulated", (String) null), new
ShardStateAction.Listener() {
@Override
public void onSuccess() {
success.set(true);
@ -988,7 +1006,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
}
logger.info("blocking request from master [{}] to [{}]", masterNode, nonMasterNode);
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class, masterNode);
MockTransportService masterTransportService = (MockTransportService) internalCluster().getInstance(TransportService.class,
masterNode);
if (randomBoolean()) {
masterTransportService.addUnresponsiveRule(internalCluster().getInstance(TransportService.class, nonMasterNode));
} else {
@ -1020,9 +1039,9 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
final String masterNode = masterNodeFuture.get();
logger.info("--> creating index [test] with one shard and on replica");
assertAcked(prepareCreate("test").setSettings(
Settings.builder().put(indexSettings())
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0))
Settings.builder().put(indexSettings())
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0))
);
ensureGreen("test");
@ -1039,7 +1058,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
MockTransportService transportServiceNode2 = (MockTransportService) internalCluster().getInstance(TransportService.class, node_2);
CountDownLatch beginRelocationLatch = new CountDownLatch(1);
CountDownLatch endRelocationLatch = new CountDownLatch(1);
transportServiceNode2.addTracer(new IndicesStoreIntegrationIT.ReclocationStartEndTracer(logger, beginRelocationLatch, endRelocationLatch));
transportServiceNode2.addTracer(new IndicesStoreIntegrationIT.ReclocationStartEndTracer(logger, beginRelocationLatch,
endRelocationLatch));
internalCluster().client().admin().cluster().prepareReroute().add(new MoveAllocationCommand("test", 0, node_1, node_2)).get();
// wait for relocation to start
beginRelocationLatch.await();
@ -1076,7 +1096,7 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/11665")
public void testIndicesDeleted() throws Exception {
configureUnicastCluster(3, null, 2);
InternalTestCluster.Async<List<String>> masterNodes= internalCluster().startMasterOnlyNodesAsync(2);
InternalTestCluster.Async<List<String>> masterNodes = internalCluster().startMasterOnlyNodesAsync(2);
InternalTestCluster.Async<String> dataNode = internalCluster().startDataOnlyNodeAsync();
dataNode.get();
masterNodes.get();
@ -1158,7 +1178,8 @@ public class DiscoveryWithServiceDisruptionsIT extends ESIntegTestCase {
assertNull("node [" + node + "] still has [" + state.nodes().masterNode() + "] as master", state.nodes().masterNode());
if (expectedBlocks != null) {
for (ClusterBlockLevel level : expectedBlocks.levels()) {
assertTrue("node [" + node + "] does have level [" + level + "] in it's blocks", state.getBlocks().hasGlobalBlock(level));
assertTrue("node [" + node + "] does have level [" + level + "] in it's blocks", state.getBlocks().hasGlobalBlock
(level));
}
}
}