SOLR-13045: Harden TestSimPolicyCloud

This commit fixes a race condition in SimClusterStateProvider, fixing
several fails in TestSimPolicyCloud.
This commit is contained in:
Jason Gerlowski 2018-12-07 11:07:42 -05:00
parent af6e15e838
commit f89f109ec1
4 changed files with 75 additions and 74 deletions

View File

@ -838,7 +838,7 @@ public class SimCloudManager implements SolrCloudManager {
results.add("success", "");
break;
case ADDROLE:
nodeStateProvider.simAddNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
nodeStateProvider.simSetNodeValue(req.getParams().get("node"), "nodeRole", req.getParams().get("role"));
break;
case CREATESHARD:
try {

View File

@ -691,6 +691,8 @@ public class SimClusterStateProvider implements ClusterStateProvider {
private void simRunLeaderElection(String collection, Slice s, boolean saveState) throws Exception {
AtomicBoolean stateChanged = new AtomicBoolean(Boolean.FALSE);
lock.lockInterruptibly();
try {
Replica leader = s.getLeader();
if (leader == null || !liveNodes.contains(leader.getNodeName())) {
log.trace("Running leader election for {} / {}", collection, s.getName());
@ -762,9 +764,12 @@ public class SimClusterStateProvider implements ClusterStateProvider {
} else {
log.trace("-- already has leader for {} / {}", collection, s.getName());
}
} finally {
if (stateChanged.get() || saveState) {
collectionsStatesRef.set(null);
}
lock.unlock();
}
}
/**

View File

@ -23,7 +23,6 @@ import java.util.concurrent.TimeUnit;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@ -52,7 +51,6 @@ import static org.apache.solr.cloud.autoscaling.AutoScalingHandlerTest.createAut
@TimeoutSuite(millis = 48 * 3600 * 1000)
@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.ComputePlanAction=INFO;org.apache.solr.cloud.autoscaling.ExecutePlanAction=DEBUG;org.apache.solr.cloud.autoscaling.ScheduledTriggers=DEBUG")
//@LogLevel("org.apache.solr.cloud.autoscaling=DEBUG;org.apache.solr.cloud.autoscaling.NodeLostTrigger=INFO;org.apache.client.solrj.cloud.autoscaling=DEBUG;org.apache.solr.cloud.CloudTestUtils=TRACE")
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // this test can fail to elect a leader, seems to be common among sim tests
public class TestSimExtremeIndexing extends SimSolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

View File

@ -108,7 +108,6 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
}
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
public void testCreateCollectionAddReplica() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();
@ -135,7 +134,6 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
getCollectionState(collectionName).forEachReplica((s, replica) -> assertEquals(nodeId, replica.getNodeName()));
}
@AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028")
public void testCreateCollectionSplitShard() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String firstNode = cluster.getSimClusterStateProvider().simGetRandomNode();
@ -294,7 +292,7 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
assertEquals(3, coll.getSlice("s3").getReplicas().size());
coll.forEachReplica(verifyReplicas);
}
@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 14-Oct-2018
public void testCreateCollectionAddShardUsingPolicy() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String nodeId = cluster.getSimClusterStateProvider().simGetRandomNode();