Additional logging in test framework methods that 'waitFor' something to better trace order of operations when failures occur

This commit is contained in:
Chris Hostetter 2019-08-21 13:13:40 -07:00
parent 8f4103dd4b
commit 2fdc882bc4
6 changed files with 21 additions and 1 deletions

View File

@ -18,6 +18,7 @@ package org.apache.solr.cloud.autoscaling;
import static org.apache.solr.common.util.Utils.makeMap;
import java.lang.invoke.MethodHandles;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@ -45,7 +46,12 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String COLLECTION1 = "testSimple1";
private static final String COLLECTION2 = "testSimple2";
@ -220,6 +226,7 @@ public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
}
private void waitForNodeLeave(String lostNodeName) throws InterruptedException {
log.info("waitForNodeLeave: {}", lostNodeName);
ZkStateReader reader = cluster.getSolrClient().getZkStateReader();
TimeOut timeOut = new TimeOut(20, TimeUnit.SECONDS, TimeSource.NANO_TIME);
while (reader.getClusterState().getLiveNodes().contains(lostNodeName)) {

View File

@ -252,6 +252,7 @@ public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTes
public static void verifyReplicaStatus(ZkStateReader reader, String collection, String shard, String coreNodeName,
Replica.State expectedState) throws InterruptedException, TimeoutException {
log.info("verifyReplicaStatus ({}) shard={} coreNodeName={}", collection, shard, coreNodeName);
reader.waitForState(collection, 15000, TimeUnit.MILLISECONDS,
(collectionState) -> collectionState != null && collectionState.getSlice(shard) != null
&& collectionState.getSlice(shard).getReplicasMap().get(coreNodeName) != null

View File

@ -360,6 +360,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
}
public static void waitForCollection(ZkStateReader reader, String collection, int slices) throws Exception {
log.info("waitForCollection ({}): slices={}", collection, slices);
// wait until shards have started registering...
int cnt = 30;
while (!reader.getClusterState().hasCollection(collection)) {
@ -587,11 +588,12 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
}
protected void waitForLiveNode(JettySolrRunner j) throws InterruptedException, TimeoutException {
log.info("waitForLiveNode: {}", j.getNodeName());
cloudClient.getZkStateReader().waitForLiveNodes(30, TimeUnit.SECONDS, SolrCloudTestCase.containsLiveNode(j.getNodeName()));
}
protected void waitForActiveReplicaCount(CloudSolrClient client, String collection, int expectedNumReplicas) throws TimeoutException, NotInClusterStateException {
log.debug("Waiting to see {} active replicas in collection: {}", expectedNumReplicas, collection);
log.info("Waiting to see {} active replicas in collection: {}", expectedNumReplicas, collection);
AtomicInteger nReplicas = new AtomicInteger();
try {
client.getZkStateReader().waitForState(collection, 30, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
@ -2240,6 +2242,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
}
protected void waitForReplicationFromReplicas(String collectionName, ZkStateReader zkStateReader, TimeOut timeout) throws KeeperException, InterruptedException, IOException {
log.info("waitForReplicationFromReplicas: {}", collectionName);
zkStateReader.forceUpdateCollection(collectionName);
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
Map<String, CoreContainer> containers = new HashMap<>();
@ -2304,6 +2307,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
}
protected void waitForAllWarmingSearchers() throws InterruptedException {
log.info("waitForAllWarmingSearchers");
for (JettySolrRunner jetty:jettys) {
if (!jetty.isRunning()) {
continue;

View File

@ -323,6 +323,7 @@ public class MiniSolrCloudCluster {
}
private void waitForAllNodes(int numServers, int timeoutSeconds) throws IOException, InterruptedException, TimeoutException {
log.info("waitForAllNodes: numServers={}", numServers);
int numRunning = 0;
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
@ -352,6 +353,7 @@ public class MiniSolrCloudCluster {
public void waitForNode(JettySolrRunner jetty, int timeoutSeconds)
throws IOException, InterruptedException, TimeoutException {
log.info("waitForNode: {}", jetty.getNodeName());
ZkStateReader reader = getSolrClient().getZkStateReader();
@ -742,6 +744,7 @@ public class MiniSolrCloudCluster {
}
public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) {
log.info("waitForActiveCollection: {}", collection);
CollectionStatePredicate predicate = expectedShardsAndActiveReplicas(shards, totalReplicas);
AtomicReference<DocCollection> state = new AtomicReference<>();
@ -789,6 +792,7 @@ public class MiniSolrCloudCluster {
}
public void waitForJettyToStop(JettySolrRunner runner) throws TimeoutException {
log.info("waitForJettyToStop: {}", runner.getNodeName());
TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS, TimeSource.NANO_TIME);
while(!timeout.hasTimedOut()) {
if (runner.isStopped()) {

View File

@ -316,6 +316,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
* @param predicate a predicate to match against the collection state
*/
protected static void waitForState(String message, String collection, CollectionStatePredicate predicate, int timeout, TimeUnit timeUnit) {
log.info("waitForState ({}): {}", collection, message);
AtomicReference<DocCollection> state = new AtomicReference<>();
AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
try {
@ -455,6 +456,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
}
protected NamedList waitForResponse(Predicate<NamedList> predicate, SolrRequest request, int intervalInMillis, int numRetries, String messageOnFail) {
log.info("waitForResponse: {}", request);
int i = 0;
for (; i < numRetries; i++) {
try {

View File

@ -639,6 +639,7 @@ public class ZkTestServer {
}
public static boolean waitForServerDown(String hp, long timeoutMs) {
log.info("waitForServerDown: {}", hp);
final TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
while (true) {
try {
@ -660,6 +661,7 @@ public class ZkTestServer {
}
public static boolean waitForServerUp(String hp, long timeoutMs) {
log.info("waitForServerUp: {}", hp);
final TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
while (true) {
try {