mirror of https://github.com/apache/lucene.git
SOLR-14159: Eliminate some 'spin loops' in tests that may be contributing factors to odd test failures
(cherry picked from commit 6b3e7feba1
)
This commit is contained in:
parent
1f2212c8d8
commit
e0820acc45
|
@ -25,6 +25,7 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
import org.apache.solr.JSONTestUtil;
|
import org.apache.solr.JSONTestUtil;
|
||||||
import org.apache.solr.client.solrj.SolrServerException;
|
import org.apache.solr.client.solrj.SolrServerException;
|
||||||
|
@ -37,8 +38,6 @@ import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
import org.apache.solr.common.cloud.Replica;
|
||||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.common.util.TimeSource;
|
|
||||||
import org.apache.solr.util.TimeOut;
|
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -168,14 +167,22 @@ public class TestCloudConsistency extends SolrCloudTestCase {
|
||||||
|
|
||||||
cluster.waitForNode(j1, 30);
|
cluster.waitForNode(j1, 30);
|
||||||
cluster.waitForNode(j2, 30);
|
cluster.waitForNode(j2, 30);
|
||||||
|
|
||||||
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
// the meat of the test -- wait to see if a different replica become a leader
|
||||||
while (!timeOut.hasTimedOut()) {
|
// the correct behavior is that this should time out, if it succeeds we have a problem...
|
||||||
Replica newLeader = getCollectionState(collection).getSlice("shard1").getLeader();
|
expectThrows(TimeoutException.class,
|
||||||
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
"Did not time out waiting for new leader, out of sync replica became leader",
|
||||||
fail("Out of sync replica became leader " + newLeader);
|
() -> {
|
||||||
}
|
cluster.getSolrClient().waitForState(collection, 10, TimeUnit.SECONDS, (state) -> {
|
||||||
}
|
Replica newLeader = state.getSlice("shard1").getLeader();
|
||||||
|
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
||||||
|
// this is is the bad case, our "bad" state was found before timeout
|
||||||
|
log.error("WTF: New Leader={}", newLeader);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false; // still no bad state, wait for timeout
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
JettySolrRunner j0 = cluster.getJettySolrRunner(0);
|
JettySolrRunner j0 = cluster.getJettySolrRunner(0);
|
||||||
j0.start();
|
j0.start();
|
||||||
|
@ -211,13 +218,21 @@ public class TestCloudConsistency extends SolrCloudTestCase {
|
||||||
waitForState("Timeout waiting for leader goes DOWN", collection, (liveNodes, collectionState)
|
waitForState("Timeout waiting for leader goes DOWN", collection, (liveNodes, collectionState)
|
||||||
-> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
|
-> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
|
||||||
|
|
||||||
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
// the meat of the test -- wait to see if a different replica become a leader
|
||||||
while (!timeOut.hasTimedOut()) {
|
// the correct behavior is that this should time out, if it succeeds we have a problem...
|
||||||
Replica newLeader = getCollectionState(collection).getLeader("shard1");
|
expectThrows(TimeoutException.class,
|
||||||
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
"Did not time out waiting for new leader, out of sync replica became leader",
|
||||||
fail("Out of sync replica became leader " + newLeader);
|
() -> {
|
||||||
}
|
cluster.getSolrClient().waitForState(collection, 10, TimeUnit.SECONDS, (state) -> {
|
||||||
}
|
Replica newLeader = state.getSlice("shard1").getLeader();
|
||||||
|
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
||||||
|
// this is is the bad case, our "bad" state was found before timeout
|
||||||
|
log.error("WTF: New Leader={}", newLeader);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false; // still no bad state, wait for timeout
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
proxies.get(cluster.getJettySolrRunner(0)).reopen();
|
proxies.get(cluster.getJettySolrRunner(0)).reopen();
|
||||||
cluster.getJettySolrRunner(0).start();
|
cluster.getJettySolrRunner(0).start();
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.TimeoutException;
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
|
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
|
||||||
|
|
||||||
|
@ -39,9 +40,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.client.solrj.response.RequestStatusState;
|
import org.apache.solr.client.solrj.response.RequestStatusState;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.common.cloud.Replica;
|
import org.apache.solr.common.cloud.Replica;
|
||||||
import org.apache.solr.common.util.TimeSource;
|
|
||||||
import org.apache.solr.util.TestInjection;
|
import org.apache.solr.util.TestInjection;
|
||||||
import org.apache.solr.util.TimeOut;
|
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -172,13 +171,20 @@ public class TestTlogReplayVsRecovery extends SolrCloudTestCase {
|
||||||
waitForState("Timeout waiting for leader goes DOWN", COLLECTION, (liveNodes, collectionState)
|
waitForState("Timeout waiting for leader goes DOWN", COLLECTION, (liveNodes, collectionState)
|
||||||
-> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
|
-> collectionState.getReplica(leader.getName()).getState() == Replica.State.DOWN);
|
||||||
|
|
||||||
TimeOut timeOut = new TimeOut(10, TimeUnit.SECONDS, TimeSource.NANO_TIME);
|
// Sanity check that a new (out of sync) replica doesn't come up in our place...
|
||||||
while (!timeOut.hasTimedOut()) {
|
expectThrows(TimeoutException.class,
|
||||||
Replica newLeader = getCollectionState(COLLECTION).getLeader("shard1");
|
"Did not time out waiting for new leader, out of sync replica became leader",
|
||||||
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
() -> {
|
||||||
fail("Out of sync replica became leader " + newLeader);
|
cluster.getSolrClient().waitForState(COLLECTION, 10, TimeUnit.SECONDS, (state) -> {
|
||||||
}
|
Replica newLeader = state.getSlice("shard1").getLeader();
|
||||||
}
|
if (newLeader != null && !newLeader.getName().equals(leader.getName()) && newLeader.getState() == Replica.State.ACTIVE) {
|
||||||
|
// this is is the bad case, our "bad" state was found before timeout
|
||||||
|
log.error("WTF: New Leader={}", newLeader);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false; // still no bad state, wait for timeout
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
log.info("Enabling TestInjection.updateLogReplayRandomPause");
|
log.info("Enabling TestInjection.updateLogReplayRandomPause");
|
||||||
TestInjection.updateLogReplayRandomPause = "true:100";
|
TestInjection.updateLogReplayRandomPause = "true:100";
|
||||||
|
|
Loading…
Reference in New Issue