Additional logging in test framework methods that 'waitFor' something to better trace order of operations when failures occur

This commit is contained in:
Chris Hostetter 2019-08-21 13:13:40 -07:00
parent 8f4103dd4b
commit 2fdc882bc4
6 changed files with 21 additions and 1 deletions

View File

@ -18,6 +18,7 @@ package org.apache.solr.cloud.autoscaling;
import static org.apache.solr.common.util.Utils.makeMap; import static org.apache.solr.common.util.Utils.makeMap;
import java.lang.invoke.MethodHandles;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -45,7 +46,12 @@ import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase { public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String COLLECTION1 = "testSimple1"; private static final String COLLECTION1 = "testSimple1";
private static final String COLLECTION2 = "testSimple2"; private static final String COLLECTION2 = "testSimple2";
@ -220,6 +226,7 @@ public class AutoAddReplicasIntegrationTest extends SolrCloudTestCase {
} }
private void waitForNodeLeave(String lostNodeName) throws InterruptedException { private void waitForNodeLeave(String lostNodeName) throws InterruptedException {
log.info("waitForNodeLeave: {}", lostNodeName);
ZkStateReader reader = cluster.getSolrClient().getZkStateReader(); ZkStateReader reader = cluster.getSolrClient().getZkStateReader();
TimeOut timeOut = new TimeOut(20, TimeUnit.SECONDS, TimeSource.NANO_TIME); TimeOut timeOut = new TimeOut(20, TimeUnit.SECONDS, TimeSource.NANO_TIME);
while (reader.getClusterState().getLiveNodes().contains(lostNodeName)) { while (reader.getClusterState().getLiveNodes().contains(lostNodeName)) {

View File

@ -252,6 +252,7 @@ public abstract class AbstractDistribZkTestBase extends BaseDistributedSearchTes
public static void verifyReplicaStatus(ZkStateReader reader, String collection, String shard, String coreNodeName, public static void verifyReplicaStatus(ZkStateReader reader, String collection, String shard, String coreNodeName,
Replica.State expectedState) throws InterruptedException, TimeoutException { Replica.State expectedState) throws InterruptedException, TimeoutException {
log.info("verifyReplicaStatus ({}) shard={} coreNodeName={}", collection, shard, coreNodeName);
reader.waitForState(collection, 15000, TimeUnit.MILLISECONDS, reader.waitForState(collection, 15000, TimeUnit.MILLISECONDS,
(collectionState) -> collectionState != null && collectionState.getSlice(shard) != null (collectionState) -> collectionState != null && collectionState.getSlice(shard) != null
&& collectionState.getSlice(shard).getReplicasMap().get(coreNodeName) != null && collectionState.getSlice(shard).getReplicasMap().get(coreNodeName) != null

View File

@ -360,6 +360,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
} }
public static void waitForCollection(ZkStateReader reader, String collection, int slices) throws Exception { public static void waitForCollection(ZkStateReader reader, String collection, int slices) throws Exception {
log.info("waitForCollection ({}): slices={}", collection, slices);
// wait until shards have started registering... // wait until shards have started registering...
int cnt = 30; int cnt = 30;
while (!reader.getClusterState().hasCollection(collection)) { while (!reader.getClusterState().hasCollection(collection)) {
@ -587,11 +588,12 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
} }
protected void waitForLiveNode(JettySolrRunner j) throws InterruptedException, TimeoutException { protected void waitForLiveNode(JettySolrRunner j) throws InterruptedException, TimeoutException {
log.info("waitForLiveNode: {}", j.getNodeName());
cloudClient.getZkStateReader().waitForLiveNodes(30, TimeUnit.SECONDS, SolrCloudTestCase.containsLiveNode(j.getNodeName())); cloudClient.getZkStateReader().waitForLiveNodes(30, TimeUnit.SECONDS, SolrCloudTestCase.containsLiveNode(j.getNodeName()));
} }
protected void waitForActiveReplicaCount(CloudSolrClient client, String collection, int expectedNumReplicas) throws TimeoutException, NotInClusterStateException { protected void waitForActiveReplicaCount(CloudSolrClient client, String collection, int expectedNumReplicas) throws TimeoutException, NotInClusterStateException {
log.debug("Waiting to see {} active replicas in collection: {}", expectedNumReplicas, collection); log.info("Waiting to see {} active replicas in collection: {}", expectedNumReplicas, collection);
AtomicInteger nReplicas = new AtomicInteger(); AtomicInteger nReplicas = new AtomicInteger();
try { try {
client.getZkStateReader().waitForState(collection, 30, TimeUnit.SECONDS, (liveNodes, collectionState) -> { client.getZkStateReader().waitForState(collection, 30, TimeUnit.SECONDS, (liveNodes, collectionState) -> {
@ -2240,6 +2242,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
} }
protected void waitForReplicationFromReplicas(String collectionName, ZkStateReader zkStateReader, TimeOut timeout) throws KeeperException, InterruptedException, IOException { protected void waitForReplicationFromReplicas(String collectionName, ZkStateReader zkStateReader, TimeOut timeout) throws KeeperException, InterruptedException, IOException {
log.info("waitForReplicationFromReplicas: {}", collectionName);
zkStateReader.forceUpdateCollection(collectionName); zkStateReader.forceUpdateCollection(collectionName);
DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName); DocCollection collection = zkStateReader.getClusterState().getCollection(collectionName);
Map<String, CoreContainer> containers = new HashMap<>(); Map<String, CoreContainer> containers = new HashMap<>();
@ -2304,6 +2307,7 @@ public abstract class AbstractFullDistribZkTestBase extends AbstractDistribZkTes
} }
protected void waitForAllWarmingSearchers() throws InterruptedException { protected void waitForAllWarmingSearchers() throws InterruptedException {
log.info("waitForAllWarmingSearchers");
for (JettySolrRunner jetty:jettys) { for (JettySolrRunner jetty:jettys) {
if (!jetty.isRunning()) { if (!jetty.isRunning()) {
continue; continue;

View File

@ -323,6 +323,7 @@ public class MiniSolrCloudCluster {
} }
private void waitForAllNodes(int numServers, int timeoutSeconds) throws IOException, InterruptedException, TimeoutException { private void waitForAllNodes(int numServers, int timeoutSeconds) throws IOException, InterruptedException, TimeoutException {
log.info("waitForAllNodes: numServers={}", numServers);
int numRunning = 0; int numRunning = 0;
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME); TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
@ -352,6 +353,7 @@ public class MiniSolrCloudCluster {
public void waitForNode(JettySolrRunner jetty, int timeoutSeconds) public void waitForNode(JettySolrRunner jetty, int timeoutSeconds)
throws IOException, InterruptedException, TimeoutException { throws IOException, InterruptedException, TimeoutException {
log.info("waitForNode: {}", jetty.getNodeName());
ZkStateReader reader = getSolrClient().getZkStateReader(); ZkStateReader reader = getSolrClient().getZkStateReader();
@ -742,6 +744,7 @@ public class MiniSolrCloudCluster {
} }
public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) { public void waitForActiveCollection(String collection, long wait, TimeUnit unit, int shards, int totalReplicas) {
log.info("waitForActiveCollection: {}", collection);
CollectionStatePredicate predicate = expectedShardsAndActiveReplicas(shards, totalReplicas); CollectionStatePredicate predicate = expectedShardsAndActiveReplicas(shards, totalReplicas);
AtomicReference<DocCollection> state = new AtomicReference<>(); AtomicReference<DocCollection> state = new AtomicReference<>();
@ -789,6 +792,7 @@ public class MiniSolrCloudCluster {
} }
public void waitForJettyToStop(JettySolrRunner runner) throws TimeoutException { public void waitForJettyToStop(JettySolrRunner runner) throws TimeoutException {
log.info("waitForJettyToStop: {}", runner.getNodeName());
TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS, TimeSource.NANO_TIME); TimeOut timeout = new TimeOut(15, TimeUnit.SECONDS, TimeSource.NANO_TIME);
while(!timeout.hasTimedOut()) { while(!timeout.hasTimedOut()) {
if (runner.isStopped()) { if (runner.isStopped()) {

View File

@ -316,6 +316,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
* @param predicate a predicate to match against the collection state * @param predicate a predicate to match against the collection state
*/ */
protected static void waitForState(String message, String collection, CollectionStatePredicate predicate, int timeout, TimeUnit timeUnit) { protected static void waitForState(String message, String collection, CollectionStatePredicate predicate, int timeout, TimeUnit timeUnit) {
log.info("waitForState ({}): {}", collection, message);
AtomicReference<DocCollection> state = new AtomicReference<>(); AtomicReference<DocCollection> state = new AtomicReference<>();
AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>(); AtomicReference<Set<String>> liveNodesLastSeen = new AtomicReference<>();
try { try {
@ -455,6 +456,7 @@ public class SolrCloudTestCase extends SolrTestCaseJ4 {
} }
protected NamedList waitForResponse(Predicate<NamedList> predicate, SolrRequest request, int intervalInMillis, int numRetries, String messageOnFail) { protected NamedList waitForResponse(Predicate<NamedList> predicate, SolrRequest request, int intervalInMillis, int numRetries, String messageOnFail) {
log.info("waitForResponse: {}", request);
int i = 0; int i = 0;
for (; i < numRetries; i++) { for (; i < numRetries; i++) {
try { try {

View File

@ -639,6 +639,7 @@ public class ZkTestServer {
} }
public static boolean waitForServerDown(String hp, long timeoutMs) { public static boolean waitForServerDown(String hp, long timeoutMs) {
log.info("waitForServerDown: {}", hp);
final TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); final TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
while (true) { while (true) {
try { try {
@ -660,6 +661,7 @@ public class ZkTestServer {
} }
public static boolean waitForServerUp(String hp, long timeoutMs) { public static boolean waitForServerUp(String hp, long timeoutMs) {
log.info("waitForServerUp: {}", hp);
final TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME); final TimeOut timeout = new TimeOut(timeoutMs, TimeUnit.MILLISECONDS, TimeSource.NANO_TIME);
while (true) { while (true) {
try { try {