improve retry when closing the node performing an operation
This commit is contained in:
parent
300f38ee3c
commit
a2ceaa91cc
|
@ -31,6 +31,7 @@ import org.elasticsearch.cluster.TimeoutClusterStateListener;
|
||||||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.node.NodeCloseException;
|
||||||
import org.elasticsearch.threadpool.ThreadPool;
|
import org.elasticsearch.threadpool.ThreadPool;
|
||||||
import org.elasticsearch.transport.*;
|
import org.elasticsearch.transport.*;
|
||||||
|
|
||||||
|
@ -78,7 +79,7 @@ public abstract class TransportMasterNodeOperationAction<Request extends MasterN
|
||||||
|
|
||||||
private void innerExecute(final Request request, final ActionListener<Response> listener, final boolean retrying) {
|
private void innerExecute(final Request request, final ActionListener<Response> listener, final boolean retrying) {
|
||||||
final ClusterState clusterState = clusterService.state();
|
final ClusterState clusterState = clusterService.state();
|
||||||
DiscoveryNodes nodes = clusterState.nodes();
|
final DiscoveryNodes nodes = clusterState.nodes();
|
||||||
if (nodes.localNodeMaster()) {
|
if (nodes.localNodeMaster()) {
|
||||||
threadPool.execute(new Runnable() {
|
threadPool.execute(new Runnable() {
|
||||||
@Override public void run() {
|
@Override public void run() {
|
||||||
|
@ -123,7 +124,7 @@ public abstract class TransportMasterNodeOperationAction<Request extends MasterN
|
||||||
|
|
||||||
@Override public void onClose() {
|
@Override public void onClose() {
|
||||||
clusterService.remove(this);
|
clusterService.remove(this);
|
||||||
listener.onFailure(new ElasticSearchIllegalStateException("node is shutting down"));
|
listener.onFailure(new NodeCloseException(nodes.localNode()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void onTimeout(TimeValue timeout) {
|
@Override public void onTimeout(TimeValue timeout) {
|
||||||
|
|
|
@ -20,7 +20,6 @@
|
||||||
package org.elasticsearch.action.support.replication;
|
package org.elasticsearch.action.support.replication;
|
||||||
|
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.ElasticSearchIllegalStateException;
|
|
||||||
import org.elasticsearch.ExceptionsHelper;
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
import org.elasticsearch.action.ActionListener;
|
import org.elasticsearch.action.ActionListener;
|
||||||
import org.elasticsearch.action.ActionResponse;
|
import org.elasticsearch.action.ActionResponse;
|
||||||
|
@ -48,6 +47,7 @@ import org.elasticsearch.index.shard.ShardId;
|
||||||
import org.elasticsearch.index.shard.service.IndexShard;
|
import org.elasticsearch.index.shard.service.IndexShard;
|
||||||
import org.elasticsearch.indices.IndexMissingException;
|
import org.elasticsearch.indices.IndexMissingException;
|
||||||
import org.elasticsearch.indices.IndicesService;
|
import org.elasticsearch.indices.IndicesService;
|
||||||
|
import org.elasticsearch.node.NodeCloseException;
|
||||||
import org.elasticsearch.threadpool.ThreadPool;
|
import org.elasticsearch.threadpool.ThreadPool;
|
||||||
import org.elasticsearch.transport.*;
|
import org.elasticsearch.transport.*;
|
||||||
|
|
||||||
|
@ -296,7 +296,7 @@ public abstract class TransportShardReplicationOperationAction<Request extends S
|
||||||
|
|
||||||
@Override public void handleException(RemoteTransportException exp) {
|
@Override public void handleException(RemoteTransportException exp) {
|
||||||
// if we got disconnected from the node, retry it...
|
// if we got disconnected from the node, retry it...
|
||||||
if (exp.unwrapCause() instanceof ConnectTransportException) {
|
if (exp.unwrapCause() instanceof ConnectTransportException || exp.unwrapCause() instanceof NodeCloseException) {
|
||||||
primaryOperationStarted.set(false);
|
primaryOperationStarted.set(false);
|
||||||
retryPrimary(fromClusterEvent, shard.shardId());
|
retryPrimary(fromClusterEvent, shard.shardId());
|
||||||
} else {
|
} else {
|
||||||
|
@ -342,7 +342,7 @@ public abstract class TransportShardReplicationOperationAction<Request extends S
|
||||||
|
|
||||||
@Override public void onClose() {
|
@Override public void onClose() {
|
||||||
clusterService.remove(this);
|
clusterService.remove(this);
|
||||||
listener.onFailure(new ElasticSearchIllegalStateException("node is shutting down"));
|
listener.onFailure(new NodeCloseException(nodes.localNode()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void clusterChanged(ClusterChangedEvent event) {
|
@Override public void clusterChanged(ClusterChangedEvent event) {
|
||||||
|
|
|
@ -118,6 +118,10 @@ public class InternalClusterService extends AbstractLifecycleComponent<ClusterSe
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(TimeValue timeout, final TimeoutClusterStateListener listener) {
|
public void add(TimeValue timeout, final TimeoutClusterStateListener listener) {
|
||||||
|
if (lifecycle.stoppedOrClosed()) {
|
||||||
|
listener.onClose();
|
||||||
|
return;
|
||||||
|
}
|
||||||
NotifyTimeout notifyTimeout = new NotifyTimeout(listener, timeout);
|
NotifyTimeout notifyTimeout = new NotifyTimeout(listener, timeout);
|
||||||
Timeout timerTimeout = timerService.newTimeout(notifyTimeout, timeout, TimerService.ExecutionType.THREADED);
|
Timeout timerTimeout = timerService.newTimeout(notifyTimeout, timeout, TimerService.ExecutionType.THREADED);
|
||||||
onGoingTimeouts.add(new Tuple<Timeout, NotifyTimeout>(timerTimeout, notifyTimeout));
|
onGoingTimeouts.add(new Tuple<Timeout, NotifyTimeout>(timerTimeout, notifyTimeout));
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elastic Search and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. Elastic Search licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.node;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticSearchException;
|
||||||
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author kimchy (shay.banon)
|
||||||
|
*/
|
||||||
|
public class NodeCloseException extends ElasticSearchException {
|
||||||
|
|
||||||
|
private final DiscoveryNode node;
|
||||||
|
|
||||||
|
public NodeCloseException(DiscoveryNode node) {
|
||||||
|
super("node closed " + node);
|
||||||
|
this.node = node;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DiscoveryNode node() {
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,6 +21,8 @@ package org.elasticsearch.test.integration.recovery;
|
||||||
|
|
||||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||||
import org.elasticsearch.common.collect.MapBuilder;
|
import org.elasticsearch.common.collect.MapBuilder;
|
||||||
|
import org.elasticsearch.common.logging.ESLogger;
|
||||||
|
import org.elasticsearch.common.logging.Loggers;
|
||||||
import org.elasticsearch.test.integration.AbstractNodesTests;
|
import org.elasticsearch.test.integration.AbstractNodesTests;
|
||||||
import org.testng.annotations.AfterMethod;
|
import org.testng.annotations.AfterMethod;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
@ -38,147 +40,37 @@ import static org.hamcrest.Matchers.*;
|
||||||
*/
|
*/
|
||||||
public class RecoveryWhileUnderLoadTests extends AbstractNodesTests {
|
public class RecoveryWhileUnderLoadTests extends AbstractNodesTests {
|
||||||
|
|
||||||
|
private final ESLogger logger = Loggers.getLogger(RecoveryWhileUnderLoadTests.class);
|
||||||
|
|
||||||
@AfterMethod public void shutdownNodes() {
|
@AfterMethod public void shutdownNodes() {
|
||||||
closeAllNodes();
|
closeAllNodes();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test public void recoverWhileUnderLoadAllocateBackupsTest() throws Exception {
|
@Test public void recoverWhileUnderLoadAllocateBackupsTest() throws Exception {
|
||||||
|
logger.info("--> starting [node1] ...");
|
||||||
startNode("node1");
|
startNode("node1");
|
||||||
|
|
||||||
|
logger.info("--> creating test index ...");
|
||||||
client("node1").admin().indices().prepareCreate("test").execute().actionGet();
|
client("node1").admin().indices().prepareCreate("test").execute().actionGet();
|
||||||
|
|
||||||
final AtomicLong idGenerator = new AtomicLong();
|
final AtomicLong idGenerator = new AtomicLong();
|
||||||
final AtomicBoolean stop = new AtomicBoolean(false);
|
final AtomicBoolean stop = new AtomicBoolean(false);
|
||||||
Thread[] writers = new Thread[5];
|
Thread[] writers = new Thread[5];
|
||||||
final CountDownLatch stopLatch = new CountDownLatch(writers.length);
|
final CountDownLatch stopLatch = new CountDownLatch(writers.length);
|
||||||
|
|
||||||
|
logger.info("--> starting {} indexing threads", writers.length);
|
||||||
for (int i = 0; i < writers.length; i++) {
|
for (int i = 0; i < writers.length; i++) {
|
||||||
writers[i] = new Thread() {
|
final int indexerId = i;
|
||||||
@Override public void run() {
|
|
||||||
while (!stop.get()) {
|
|
||||||
long id = idGenerator.incrementAndGet();
|
|
||||||
client("node1").prepareIndex("test", "type1", Long.toString(id))
|
|
||||||
.setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + id).map()).execute().actionGet();
|
|
||||||
}
|
|
||||||
stopLatch.countDown();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
writers[i].start();
|
|
||||||
}
|
|
||||||
|
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 20000) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
// now flush, just to make sure we have some data in the index, not just translog
|
|
||||||
client("node1").admin().indices().prepareFlush().execute().actionGet();
|
|
||||||
|
|
||||||
|
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 40000) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
// now start another node, while we index
|
|
||||||
startNode("server2");
|
|
||||||
|
|
||||||
// make sure the cluster state is green, and all has been recovered
|
|
||||||
assertThat(client("node1").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
|
||||||
|
|
||||||
|
|
||||||
// wait till we index 10,0000
|
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 100000) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
stop.set(true);
|
|
||||||
stopLatch.await();
|
|
||||||
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
assertThat(client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(), equalTo(idGenerator.get()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test public void recoverWhileUnderLoadAllocateBackupsRelocatePrimariesTest() throws Exception {
|
|
||||||
startNode("node1");
|
|
||||||
|
|
||||||
client("node1").admin().indices().prepareCreate("test").execute().actionGet();
|
|
||||||
|
|
||||||
final AtomicLong idGenerator = new AtomicLong();
|
|
||||||
final AtomicBoolean stop = new AtomicBoolean(false);
|
|
||||||
Thread[] writers = new Thread[5];
|
|
||||||
final CountDownLatch stopLatch = new CountDownLatch(writers.length);
|
|
||||||
for (int i = 0; i < writers.length; i++) {
|
|
||||||
writers[i] = new Thread() {
|
|
||||||
@Override public void run() {
|
|
||||||
while (!stop.get()) {
|
|
||||||
long id = idGenerator.incrementAndGet();
|
|
||||||
client("node1").prepareIndex("test", "type1", Long.toString(id))
|
|
||||||
.setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + id).map()).execute().actionGet();
|
|
||||||
}
|
|
||||||
stopLatch.countDown();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
writers[i].start();
|
|
||||||
}
|
|
||||||
|
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 20000) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
// now flush, just to make sure we have some data in the index, not just translog
|
|
||||||
client("node1").admin().indices().prepareFlush().execute().actionGet();
|
|
||||||
|
|
||||||
|
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 40000) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
// now start another node, while we index
|
|
||||||
startNode("node2");
|
|
||||||
startNode("node3");
|
|
||||||
startNode("node4");
|
|
||||||
|
|
||||||
assertThat(client("node1").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
|
||||||
|
|
||||||
|
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 150000) {
|
|
||||||
Thread.sleep(100);
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
stop.set(true);
|
|
||||||
stopLatch.await();
|
|
||||||
|
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
|
||||||
for (int i = 0; i < 10; i++) {
|
|
||||||
assertThat(client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(), equalTo(idGenerator.get()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test public void recoverWhileUnderLoadWithNodeShutdown() throws Exception {
|
|
||||||
startNode("node1");
|
|
||||||
startNode("node2");
|
|
||||||
|
|
||||||
client("node1").admin().indices().prepareCreate("test").execute().actionGet();
|
|
||||||
|
|
||||||
final AtomicLong idGenerator = new AtomicLong();
|
|
||||||
final AtomicBoolean stop = new AtomicBoolean(false);
|
|
||||||
Thread[] writers = new Thread[5];
|
|
||||||
final CountDownLatch stopLatch = new CountDownLatch(writers.length);
|
|
||||||
for (int i = 0; i < writers.length; i++) {
|
|
||||||
writers[i] = new Thread() {
|
writers[i] = new Thread() {
|
||||||
@Override public void run() {
|
@Override public void run() {
|
||||||
try {
|
try {
|
||||||
|
logger.info("**** starting indexing thread {}", indexerId);
|
||||||
while (!stop.get()) {
|
while (!stop.get()) {
|
||||||
long id = idGenerator.incrementAndGet();
|
long id = idGenerator.incrementAndGet();
|
||||||
client("node2").prepareIndex("test", "type1", Long.toString(id))
|
client("node1").prepareIndex("test", "type1", Long.toString(id))
|
||||||
.setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + id).map()).execute().actionGet();
|
.setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + id).map()).execute().actionGet();
|
||||||
}
|
}
|
||||||
|
logger.info("**** done indexing thread {}", indexerId);
|
||||||
} finally {
|
} finally {
|
||||||
stopLatch.countDown();
|
stopLatch.countDown();
|
||||||
}
|
}
|
||||||
|
@ -187,44 +79,234 @@ public class RecoveryWhileUnderLoadTests extends AbstractNodesTests {
|
||||||
writers[i].start();
|
writers[i].start();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info("--> waiting for 20000 docs to be indexed ...");
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 20000) {
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 20000) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
}
|
}
|
||||||
|
logger.info("--> 20000 docs indexed");
|
||||||
|
|
||||||
|
logger.info("--> flushing the index ....");
|
||||||
// now flush, just to make sure we have some data in the index, not just translog
|
// now flush, just to make sure we have some data in the index, not just translog
|
||||||
client("node1").admin().indices().prepareFlush().execute().actionGet();
|
client("node1").admin().indices().prepareFlush().execute().actionGet();
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("--> waiting for 40000 docs to be indexed ...");
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 40000) {
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 40000) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
}
|
}
|
||||||
|
logger.info("--> 40000 docs indexed");
|
||||||
|
|
||||||
// now start nore nodes, while we index
|
logger.info("--> starting [node2] ...");
|
||||||
startNode("node3");
|
// now start another node, while we index
|
||||||
startNode("node4");
|
startNode("node2");
|
||||||
|
|
||||||
|
logger.info("--> waiting for GREEN health status ...");
|
||||||
|
// make sure the cluster state is green, and all has been recovered
|
||||||
assertThat(client("node1").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
assertThat(client("node1").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
||||||
|
|
||||||
|
logger.info("--> waiting for 100000 docs to be indexed ...");
|
||||||
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 80000) {
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 100000) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
}
|
}
|
||||||
|
logger.info("--> 100000 docs indexed");
|
||||||
|
|
||||||
// now, shutdown nodes
|
logger.info("--> marking and waiting for indexing threads to stop ...");
|
||||||
closeNode("node1");
|
stop.set(true);
|
||||||
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
stopLatch.await();
|
||||||
closeNode("node3");
|
logger.info("--> indexing threads stopped");
|
||||||
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
|
||||||
closeNode("node4");
|
logger.info("--> refreshing the index");
|
||||||
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForYellowStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.YELLOW));
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
logger.info("--> verifying indexed content");
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
assertThat(client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(), equalTo(idGenerator.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void recoverWhileUnderLoadAllocateBackupsRelocatePrimariesTest() throws Exception {
|
||||||
|
logger.info("--> starting [node1] ...");
|
||||||
|
startNode("node1");
|
||||||
|
|
||||||
|
logger.info("--> creating test index ...");
|
||||||
|
client("node1").admin().indices().prepareCreate("test").execute().actionGet();
|
||||||
|
|
||||||
|
final AtomicLong idGenerator = new AtomicLong();
|
||||||
|
final AtomicBoolean stop = new AtomicBoolean(false);
|
||||||
|
Thread[] writers = new Thread[5];
|
||||||
|
logger.info("--> starting {} indexing threads", writers.length);
|
||||||
|
final CountDownLatch stopLatch = new CountDownLatch(writers.length);
|
||||||
|
for (int i = 0; i < writers.length; i++) {
|
||||||
|
final int indexerId = i;
|
||||||
|
writers[i] = new Thread() {
|
||||||
|
@Override public void run() {
|
||||||
|
try {
|
||||||
|
logger.info("**** starting indexing thread {}", indexerId);
|
||||||
|
while (!stop.get()) {
|
||||||
|
long id = idGenerator.incrementAndGet();
|
||||||
|
client("node1").prepareIndex("test", "type1", Long.toString(id))
|
||||||
|
.setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + id).map()).execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("**** done indexing thread {}", indexerId);
|
||||||
|
} finally {
|
||||||
|
stopLatch.countDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
writers[i].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("--> waiting for 20000 docs to be indexed ...");
|
||||||
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 20000) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("--> 20000 docs indexed");
|
||||||
|
|
||||||
|
logger.info("--> flushing the index ....");
|
||||||
|
// now flush, just to make sure we have some data in the index, not just translog
|
||||||
|
client("node1").admin().indices().prepareFlush().execute().actionGet();
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("--> waiting for 40000 docs to be indexed ...");
|
||||||
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 40000) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("--> 40000 docs indexed");
|
||||||
|
|
||||||
|
logger.info("--> starting [node2] ...");
|
||||||
|
startNode("node2");
|
||||||
|
logger.info("--> starting [node3] ...");
|
||||||
|
startNode("node3");
|
||||||
|
logger.info("--> starting [node4] ...");
|
||||||
|
startNode("node4");
|
||||||
|
|
||||||
|
logger.info("--> waiting for GREEN health status ...");
|
||||||
|
assertThat(client("node1").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("--> waiting for 150000 docs to be indexed ...");
|
||||||
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 150000) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("--> 150000 docs indexed");
|
||||||
|
|
||||||
stop.set(true);
|
stop.set(true);
|
||||||
stopLatch.await();
|
stopLatch.await();
|
||||||
|
|
||||||
|
logger.info("--> marking and waiting for indexing threads to stop ...");
|
||||||
|
stop.set(true);
|
||||||
|
stopLatch.await();
|
||||||
|
logger.info("--> indexing threads stopped");
|
||||||
|
|
||||||
|
logger.info("--> refreshing the index");
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
logger.info("--> verifying indexed content");
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
assertThat(client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(), equalTo(idGenerator.get()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test public void recoverWhileUnderLoadWithNodeShutdown() throws Exception {
|
||||||
|
logger.info("--> starting [node1] ...");
|
||||||
|
startNode("node1");
|
||||||
|
logger.info("--> starting [node2] ...");
|
||||||
|
startNode("node2");
|
||||||
|
|
||||||
|
logger.info("--> creating test index ...");
|
||||||
|
client("node1").admin().indices().prepareCreate("test").execute().actionGet();
|
||||||
|
|
||||||
|
final AtomicLong idGenerator = new AtomicLong();
|
||||||
|
final AtomicBoolean stop = new AtomicBoolean(false);
|
||||||
|
Thread[] writers = new Thread[5];
|
||||||
|
final CountDownLatch stopLatch = new CountDownLatch(writers.length);
|
||||||
|
logger.info("--> starting {} indexing threads", writers.length);
|
||||||
|
for (int i = 0; i < writers.length; i++) {
|
||||||
|
final int indexerId = i;
|
||||||
|
writers[i] = new Thread() {
|
||||||
|
@Override public void run() {
|
||||||
|
try {
|
||||||
|
logger.info("**** starting indexing thread {}", indexerId);
|
||||||
|
while (!stop.get()) {
|
||||||
|
long id = idGenerator.incrementAndGet();
|
||||||
|
client("node2").prepareIndex("test", "type1", Long.toString(id))
|
||||||
|
.setSource(MapBuilder.<String, Object>newMapBuilder().put("test", "value" + id).map()).execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("**** done indexing thread {}", indexerId);
|
||||||
|
} finally {
|
||||||
|
stopLatch.countDown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
writers[i].start();
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("--> waiting for 20000 docs to be indexed ...");
|
||||||
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 20000) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("--> 20000 docs indexed");
|
||||||
|
|
||||||
|
logger.info("--> flushing the index ....");
|
||||||
|
// now flush, just to make sure we have some data in the index, not just translog
|
||||||
|
client("node1").admin().indices().prepareFlush().execute().actionGet();
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("--> waiting for 40000 docs to be indexed ...");
|
||||||
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 40000) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("--> 40000 docs indexed");
|
||||||
|
|
||||||
|
// now start more nodes, while we index
|
||||||
|
logger.info("--> starting [node3] ...");
|
||||||
|
startNode("node3");
|
||||||
|
logger.info("--> starting [node4] ...");
|
||||||
|
startNode("node4");
|
||||||
|
|
||||||
|
logger.info("--> waiting for GREEN health status ...");
|
||||||
|
assertThat(client("node1").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
||||||
|
|
||||||
|
|
||||||
|
logger.info("--> waiting for 100000 docs to be indexed ...");
|
||||||
|
while (client("node1").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count() < 100000) {
|
||||||
|
Thread.sleep(100);
|
||||||
|
client("node1").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
}
|
||||||
|
logger.info("--> 100000 docs indexed");
|
||||||
|
|
||||||
|
// now, shutdown nodes
|
||||||
|
logger.info("--> shutting down [node1] ...");
|
||||||
|
closeNode("node1");
|
||||||
|
logger.info("--> waiting for GREEN health status ...");
|
||||||
|
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
||||||
|
|
||||||
|
logger.info("--> shutting down [node3] ...");
|
||||||
|
closeNode("node3");
|
||||||
|
logger.info("--> waiting for GREEN health status ...");
|
||||||
|
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForGreenStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.GREEN));
|
||||||
|
|
||||||
|
logger.info("--> shutting down [node4] ...");
|
||||||
|
closeNode("node4");
|
||||||
|
logger.info("--> waiting for YELLOW health status ...");
|
||||||
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForYellowStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.YELLOW));
|
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForYellowStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.YELLOW));
|
||||||
|
|
||||||
|
logger.info("--> marking and waiting for indexing threads to stop ...");
|
||||||
|
stop.set(true);
|
||||||
|
stopLatch.await();
|
||||||
|
logger.info("--> indexing threads stopped");
|
||||||
|
|
||||||
|
assertThat(client("node2").admin().cluster().prepareHealth().setTimeout("1m").setWaitForYellowStatus().execute().actionGet().status(), equalTo(ClusterHealthStatus.YELLOW));
|
||||||
|
|
||||||
|
logger.info("--> refreshing the index");
|
||||||
|
client("node2").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
|
logger.info("--> verifying indexed content");
|
||||||
client("node2").admin().indices().prepareRefresh().execute().actionGet();
|
client("node2").admin().indices().prepareRefresh().execute().actionGet();
|
||||||
for (int i = 0; i < 10; i++) {
|
for (int i = 0; i < 10; i++) {
|
||||||
assertThat(client("node2").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(), equalTo(idGenerator.get()));
|
assertThat(client("node2").prepareCount().setQuery(matchAllQuery()).execute().actionGet().count(), equalTo(idGenerator.get()));
|
||||||
|
|
Loading…
Reference in New Issue